Java 类java.io.UTFDataFormatException 实例源码

项目:megaphone    文件:ByteBufUtils.java   
public static String byteBuf2String(ByteBuf buf, Charset charset) throws UTFDataFormatException, IndexOutOfBoundsException, CharacterCodingException {

        int byteLen = buf.readableBytes();

        if (charset.equals(StandardCharsets.US_ASCII)) {
            return Utf8Reader.readUtf8(buf, byteLen);
        } else if (charset.equals(StandardCharsets.UTF_8)) {
            try {
                return Utf8Reader.readUtf8(buf.duplicate(), (int) (byteLen * 1.4));
            } catch (IndexOutOfBoundsException e) {
                // try again with 3 bytes per char
                return Utf8Reader.readUtf8(buf, byteLen * 3);
            }
        } else {
            return byteBuffersToString(buf.nioBuffers(), charset);
        }
    }
项目:GitHub    文件:Mutf8.java   
/**
 * Returns the number of bytes the modified UTF8 representation of 's' would take.
 */
public static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    final int length = s.length();
    for (int i = 0; i < length; ++i) {
        char ch = s.charAt(i);
        if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
            ++result;
        } else if (ch <= 2047) {
            result += 2;
        } else {
            result += 3;
        }
        if (shortLength && result > 65535) {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:OpenDiabetes    文件:DataOutputStream.java   
public void writeUTF(String str) throws IOException {

        int len = str.length();

        if (len > 0xffff) {
            throw new UTFDataFormatException();
        }

        int bytecount = StringConverter.getUTFSize(str);

        if (bytecount > 0xffff) {
            throw new UTFDataFormatException();
        }

        //
        writeChar(bytecount);

        HsqlByteArrayOutputStream bao =
            new HsqlByteArrayOutputStream(bytecount);

        StringConverter.stringToUTFBytes(str, bao);
        this.write(bao.getBuffer(), 0, bao.size());
    }
项目:atlas    文件:Dex.java   
public String readString() {
    int offset = readInt();
    int savedPosition = data.position();
    int savedLimit = data.limit();
    data.position(offset);
    data.limit(data.capacity());
    try {
        int expectedLength = readUleb128();
        String result = Mutf8.decode(this, new char[expectedLength]);
        if (result.length() != expectedLength) {
            throw new DexException("Declared length " + expectedLength
                    + " doesn't match decoded length of " + result.length());
        }
        return result;
    } catch (UTFDataFormatException e) {
        throw new DexException(e);
    } finally {
        data.position(savedPosition);
        data.limit(savedLimit);
    }
}
项目:atlas    文件:Mutf8.java   
/**
 * Returns the number of bytes the modified UTF8 representation of 's' would take.
 */
private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    final int length = s.length();
    for (int i = 0; i < length; ++i) {
        char ch = s.charAt(i);
        if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
            ++result;
        } else if (ch <= 2047) {
            result += 2;
        } else {
            result += 3;
        }
        if (shortLength && result > 65535) {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:atlas    文件:Dex.java   
public String readString() {
    int offset = readInt();
    int savedPosition = data.position();
    int savedLimit = data.limit();
    data.position(offset);
    data.limit(data.capacity());
    try {
        int expectedLength = readUleb128();
        String result = Mutf8.decode(this, new char[expectedLength]);
        if (result.length() != expectedLength) {
            throw new DexException2("Declared length " + expectedLength + " doesn't match decoded length of "
                    + result.length());
        }
        return result;
    } catch (UTFDataFormatException e) {
        throw new DexException2(e);
    } finally {
        data.position(savedPosition);
        data.limit(savedLimit);
    }
}
项目:atlas    文件:Mutf8.java   
/**
 * Returns the number of bytes the modified UTF8 representation of 's' would take.
 */
private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    final int length = s.length();
    for (int i = 0; i < length; ++i) {
        char ch = s.charAt(i);
        if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
            ++result;
        } else if (ch <= 2047) {
            result += 2;
        } else {
            result += 3;
        }
        if (shortLength && result > 65535) {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:javaide    文件:Dex.java   
public String readString() {
    int offset = readInt();
    int savedPosition = data.position();
    int savedLimit = data.limit();
    data.position(offset);
    data.limit(data.capacity());
    try {
        int expectedLength = readUleb128();
        String result = Mutf8.decode(this, new char[expectedLength]);
        if (result.length() != expectedLength) {
            throw new DexException("Declared length " + expectedLength
                    + " doesn't match decoded length of " + result.length());
        }
        return result;
    } catch (UTFDataFormatException e) {
        throw new DexException(e);
    } finally {
        data.position(savedPosition);
        data.limit(savedLimit);
    }
}
项目:javaide    文件:Mutf8.java   
/**
 * Returns the number of bytes the modified UTF8 representation of 's' would take.
 */
private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    final int length = s.length();
    for (int i = 0; i < length; ++i) {
        char ch = s.charAt(i);
        if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
            ++result;
        } else if (ch <= 2047) {
            result += 2;
        } else {
            result += 3;
        }
        if (shortLength && result > 65535) {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:dev-courses    文件:DataOutputStream.java   
public void writeUTF(String str) throws IOException {

        int len = str.length();

        if (len > 0xffff) {
            throw new UTFDataFormatException();
        }

        int bytecount = StringConverter.getUTFSize(str);

        if (bytecount > 0xffff) {
            throw new UTFDataFormatException();
        }

        //
        writeChar(bytecount);

        HsqlByteArrayOutputStream bao =
            new HsqlByteArrayOutputStream(bytecount);

        StringConverter.stringToUTFBytes(str, bao);
        this.write(bao.getBuffer(), 0, bao.size());
    }
项目:monarch    文件:MsgStreamer.java   
/**
 * Used when we know the max size will fit in the current buffer.
 */
private final void writeQuickFullUTF(String str, int strlen) throws IOException {
  int utfSizeIdx = this.buffer.position();
  // skip bytes reserved for length
  this.buffer.position(utfSizeIdx + 2);
  for (int i = 0; i < strlen; i++) {
    int c = str.charAt(i);
    if ((c >= 0x0001) && (c <= 0x007F)) {
      this.buffer.put((byte) c);
    } else if (c > 0x07FF) {
      this.buffer.put((byte) (0xE0 | ((c >> 12) & 0x0F)));
      this.buffer.put((byte) (0x80 | ((c >> 6) & 0x3F)));
      this.buffer.put((byte) (0x80 | ((c >> 0) & 0x3F)));
    } else {
      this.buffer.put((byte) (0xC0 | ((c >> 6) & 0x1F)));
      this.buffer.put((byte) (0x80 | ((c >> 0) & 0x3F)));
    }
  }
  int utflen = this.buffer.position() - (utfSizeIdx + 2);
  if (utflen > 65535) {
    // act as if we wrote nothing to this buffer
    this.buffer.position(utfSizeIdx);
    throw new UTFDataFormatException();
  }
  this.buffer.putShort(utfSizeIdx, (short) utflen);
}
项目:boohee_v5.6    文件:Mutf8.java   
public static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    int length = s.length();
    int i = 0;
    while (i < length) {
        char ch = s.charAt(i);
        if (ch != '\u0000' && ch <= '') {
            result++;
        } else if (ch <= '߿') {
            result += 2;
        } else {
            result += 3;
        }
        if (!shortLength || result <= 65535) {
            i++;
        } else {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:multidex-maker    文件:Dex.java   
public String readString() {
    int offset = readInt();
    int savedPosition = data.position();
    int savedLimit = data.limit();
    data.position(offset);
    data.limit(data.capacity());
    try {
        int expectedLength = readUleb128();
        String result = Mutf8.decode(this, new char[expectedLength]);
        if (result.length() != expectedLength) {
            throw new DexException("Declared length " + expectedLength
                    + " doesn't match decoded length of " + result.length());
        }
        return result;
    } catch (UTFDataFormatException e) {
        throw new DexException(e);
    } finally {
        data.position(savedPosition);
        data.limit(savedLimit);
    }
}
项目:multidex-maker    文件:Mutf8.java   
/**
 * Returns the number of bytes the modified UTF8 representation of 's' would take.
 */
private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    final int length = s.length();
    for (int i = 0; i < length; ++i) {
        char ch = s.charAt(i);
        if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
            ++result;
        } else if (ch <= 2047) {
            result += 2;
        } else {
            result += 3;
        }
        if (shortLength && result > 65535) {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:gemfirexd-oss    文件:UTF8UtilTest.java   
/**
 * Tests that <code>skipFully</code> throws exception if there is a UTF-8
 * encoding error in the stream
 * 
 * @throws IOException if the test fails for some unexpected reason
 */
public void testSkipFullyOnInvalidStreamCJK()
        throws IOException {
    final int charLength = 10;
    InputStream in = new ReaderToUTF8Stream(
            new LoopingAlphabetReader(charLength, CharAlphabet.cjkSubset()),
            charLength, 0, "ignored-test-type");
    in.skip(2L); // Skip encoded length added by ReaderToUTF8Stream.
    in.skip(1L); // Skip one more byte to trigger a UTF error.
    try {
        UTF8Util.skipFully(in, charLength);
        fail("Should have failed because of UTF error.");
    } catch (UTFDataFormatException udfe) {
        // As expected, do nothing.
    }
}
项目:gemfirexd-oss    文件:UTF8UtilTest.java   
/**
 * Demonstrates that skipping incorrectly encoded character sequences
 * works because the stream is not checked for well-formedness.
 */
public void testSkippingInvalidEncodingWorks()
        throws IOException {
    // The array contains three valid characters and one invalid three-byte
    // representation that only has two bytes present.
    // When skipping, this sequence is (incorrectly) taken as a sequence of
    // three characters ('a' - some three byte character - 'a').
    // 0xef = 11101111, 0xb8 = 10111000
    byte[] data = {'a', (byte)0xef, (byte)0xb8, 'a', 'a'};
    byte[] dataWithLength =
        {0x0, 0x5, 'a', (byte)0xef, (byte)0xb8, 'a', 'a'};
    InputStream is = new ByteArrayInputStream(data);
    // This is actually incorrect, but does work currently.
    UTF8Util.skipFully(is, 3);
    // Verify that decoding this actually fails.
    DataInputStream dis = new DataInputStream(
                                new ByteArrayInputStream(dataWithLength));
    try {
        dis.readUTF();
        fail("UTF-8 expected to be invalid, read should fail");
    } catch (UTFDataFormatException udfe) {
        // This is expected, since the UTF-8 encoding is invalid
    }
}
项目:cassandra-sstable-tools    文件:PurgeStatisticBackend.java   
@Override
public void writeUTF(String str) throws IOException {
    int utfCount = 0, length = str.length();
    for (int i = 0; i < length; i++) {
        int charValue = str.charAt(i);
        if (charValue > 0 && charValue <= 127) {
            utfCount++;
        } else if (charValue <= 2047) {
            utfCount += 2;
        } else {
            utfCount += 3;
        }
    }
    if (utfCount > 65535) {
        throw new UTFDataFormatException(); //$NON-NLS-1$
    }
    position += utfCount * 2;
}
项目:cassandra-sstable-tools    文件:PurgeStatisticBackend.java   
@Override
public void writeUTF(String str) throws IOException {
    int utfCount = 0, length = str.length();
    for (int i = 0; i < length; i++) {
        int charValue = str.charAt(i);
        if (charValue > 0 && charValue <= 127) {
            utfCount++;
        } else if (charValue <= 2047) {
            utfCount += 2;
        } else {
            utfCount += 3;
        }
    }
    if (utfCount > 65535) {
        throw new UTFDataFormatException(); //$NON-NLS-1$
    }
    position += utfCount * 2;
}
项目:cassandra-sstable-tools    文件:PurgeStatisticBackend.java   
@Override
public void writeUTF(String str) throws IOException {
    int utfCount = 0, length = str.length();
    for (int i = 0; i < length; i++) {
        int charValue = str.charAt(i);
        if (charValue > 0 && charValue <= 127) {
            utfCount++;
        } else if (charValue <= 2047) {
            utfCount += 2;
        } else {
            utfCount += 3;
        }
    }
    if (utfCount > 65535) {
        throw new UTFDataFormatException(); //$NON-NLS-1$
    }
    position += utfCount * 2;
}
项目:Dexer    文件:UTF8.java   
/**
 * Returns the number of bytes the modified UTF8 representation of 's' would take.
 */
private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    final int length = s.length();
    for (int i = 0; i < length; ++i) {
        char ch = s.charAt(i);
        if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
            ++result;
        } else if (ch <= 2047) {
            result += 2;
        } else {
            result += 3;
        }
        if (shortLength && result > 65535) {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:aapt    文件:Dex.java   
public String readString() {
    int offset = readInt();
    int savedPosition = data.position();
    int savedLimit = data.limit();
    data.position(offset);
    data.limit(data.capacity());
    try {
        int expectedLength = readUleb128();
        String result = Mutf8.decode(this, new char[expectedLength]);
        if (result.length() != expectedLength) {
            throw new DexException("Declared length " + expectedLength
                    + " doesn't match decoded length of " + result.length());
        }
        return result;
    } catch (UTFDataFormatException e) {
        throw new DexException(e);
    } finally {
        data.position(savedPosition);
        data.limit(savedLimit);
    }
}
项目:aapt    文件:Mutf8.java   
/**
 * Returns the number of bytes the modified UTF8 representation of 's' would take.
 */
private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    final int length = s.length();
    for (int i = 0; i < length; ++i) {
        char ch = s.charAt(i);
        if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
            ++result;
        } else if (ch <= 2047) {
            result += 2;
        } else {
            result += 3;
        }
        if (shortLength && result > 65535) {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:JCL    文件:Mutf8.java   
/**
 * Returns the number of bytes the modified UTF8 representation of 's' would take.
 */
private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    final int length = s.length();
    for (int i = 0; i < length; ++i) {
        char ch = s.charAt(i);
        if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
            ++result;
        } else if (ch <= 2047) {
            result += 2;
        } else {
            result += 3;
        }
        if (shortLength && result > 65535) {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:JCL    文件:DexBuffer.java   
public String readString() {
    int offset = readInt();
    int savedPosition = position;
    position = offset;
    try {
        int expectedLength = readUleb128();
        String result = Mutf8.decode(this, new char[expectedLength]);
        if (result.length() != expectedLength) {
            throw new DexException("Declared length " + expectedLength
                    + " doesn't match decoded length of " + result.length());
        }
        return result;
    } catch (UTFDataFormatException e) {
        throw new DexException(e);
    } finally {
        position = savedPosition;
    }
}
项目:beam    文件:InstantCoder.java   
@Override
public Instant decode(InputStream inStream) throws CoderException, IOException {
  long shiftedMillis;
  try {
    shiftedMillis = new DataInputStream(inStream).readLong();
  } catch (EOFException | UTFDataFormatException exn) {
    // These exceptions correspond to decoding problems, so change
    // what kind of exception they're branded as.
    throw new CoderException(exn);
  }

  // Produces an {@link Instant} from a {@code long} representing its millis-since-epoch,
  // but shifted so that the byte representation of negative values are lexicographically
  // ordered before the byte representation of positive values.
  //
  // This deliberately utilizes the well-defined overflow for {@code long} values.
  // See http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.18.2
  return new Instant(shiftedMillis + Long.MIN_VALUE);
}
项目:beam    文件:StringUtf8Coder.java   
@Override
public String decode(InputStream inStream, Context context)
    throws IOException {
  if (context.isWholeStream) {
    byte[] bytes = StreamUtils.getBytes(inStream);
    return new String(bytes, StandardCharsets.UTF_8);
  } else {
    try {
      return readString(new DataInputStream(inStream));
    } catch (EOFException | UTFDataFormatException exn) {
      // These exceptions correspond to decoding problems, so change
      // what kind of exception they're branded as.
      throw new CoderException(exn);
    }
  }
}
项目:beam    文件:ByteCoder.java   
@Override
public Byte decode(InputStream inStream)
    throws IOException, CoderException {
  try {
    // value will be between 0-255, -1 for EOF
    int value = inStream.read();
    if (value == -1) {
      throw new EOFException("EOF encountered decoding 1 byte from input stream");
    }
    return (byte) value;
  } catch (EOFException | UTFDataFormatException exn) {
    // These exceptions correspond to decoding problems, so change
    // what kind of exception they're branded as.
    throw new CoderException(exn);
  }
}
项目:dex    文件:Mutf8.java   
/**
 * Returns the number of bytes the modified UTF8 representation of 's' would take.
 */
private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    final int length = s.length();
    for (int i = 0; i < length; ++i) {
        char ch = s.charAt(i);
        if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
            ++result;
        } else if (ch <= 2047) {
            result += 2;
        } else {
            result += 3;
        }
        if (shortLength && result > 65535) {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:dex    文件:Dex.java   
public String readString() {
    int offset = readInt();
    int savedPosition = data.position();
    int savedLimit = data.limit();
    data.position(offset);
    data.limit(data.capacity());
    try {
        int expectedLength = readUleb128();
        String result = Mutf8.decode(this, new char[expectedLength]);
        if (result.length() != expectedLength) {
            throw new DexException("Declared length " + expectedLength
                    + " doesn't match decoded length of " + result.length());
        }
        return result;
    } catch (UTFDataFormatException e) {
        throw new DexException(e);
    } finally {
        data.position(savedPosition);
        data.limit(savedLimit);
    }
}
项目:dex    文件:Mutf8.java   
/**
 * Returns the number of bytes the modified UTF8 representation of 's' would take.
 */
private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    final int length = s.length();
    for (int i = 0; i < length; ++i) {
        char ch = s.charAt(i);
        if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
            ++result;
        } else if (ch <= 2047) {
            result += 2;
        } else {
            result += 3;
        }
        if (shortLength && result > 65535) {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:multidex-maker    文件:Dex.java   
public String readString() {
    int offset = readInt();
    int savedPosition = data.position();
    int savedLimit = data.limit();
    data.position(offset);
    data.limit(data.capacity());
    try {
        int expectedLength = readUleb128();
        String result = Mutf8.decode(this, new char[expectedLength]);
        if (result.length() != expectedLength) {
            throw new DexException("Declared length " + expectedLength
                    + " doesn't match decoded length of " + result.length());
        }
        return result;
    } catch (UTFDataFormatException e) {
        throw new DexException(e);
    } finally {
        data.position(savedPosition);
        data.limit(savedLimit);
    }
}
项目:multidex-maker    文件:Mutf8.java   
/**
 * Returns the number of bytes the modified UTF8 representation of 's' would take.
 */
private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    final int length = s.length();
    for (int i = 0; i < length; ++i) {
        char ch = s.charAt(i);
        if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
            ++result;
        } else if (ch <= 2047) {
            result += 2;
        } else {
            result += 3;
        }
        if (shortLength && result > 65535) {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:gemfirexd-oss    文件:UTF8UtilTest.java   
/**
 * Tests that <code>skipFully</code> throws exception if there is a UTF-8
 * encoding error in the stream
 * 
 * @throws IOException if the test fails for some unexpected reason
 */
public void testSkipFullyOnInvalidStreamCJK()
        throws IOException {
    final int charLength = 10;
    InputStream in = new ReaderToUTF8Stream(
            new LoopingAlphabetReader(charLength, CharAlphabet.cjkSubset()),
            charLength, 0, "ignored-test-type");
    in.skip(2L); // Skip encoded length added by ReaderToUTF8Stream.
    in.skip(1L); // Skip one more byte to trigger a UTF error.
    try {
        UTF8Util.skipFully(in, charLength);
        fail("Should have failed because of UTF error.");
    } catch (UTFDataFormatException udfe) {
        // As expected, do nothing.
    }
}
项目:gemfirexd-oss    文件:UTF8UtilTest.java   
/**
 * Demonstrates that skipping incorrectly encoded character sequences
 * works because the stream is not checked for well-formedness.
 */
public void testSkippingInvalidEncodingWorks()
        throws IOException {
    // The array contains three valid characters and one invalid three-byte
    // representation that only has two bytes present.
    // When skipping, this sequence is (incorrectly) taken as a sequence of
    // three characters ('a' - some three byte character - 'a').
    // 0xef = 11101111, 0xb8 = 10111000
    byte[] data = {'a', (byte)0xef, (byte)0xb8, 'a', 'a'};
    byte[] dataWithLength =
        {0x0, 0x5, 'a', (byte)0xef, (byte)0xb8, 'a', 'a'};
    InputStream is = new ByteArrayInputStream(data);
    // This is actually incorrect, but does work currently.
    UTF8Util.skipFully(is, 3);
    // Verify that decoding this actually fails.
    DataInputStream dis = new DataInputStream(
                                new ByteArrayInputStream(dataWithLength));
    try {
        dis.readUTF();
        fail("UTF-8 expected to be invalid, read should fail");
    } catch (UTFDataFormatException udfe) {
        // This is expected, since the UTF-8 encoding is invalid
    }
}
项目:TayzGrid    文件:BlockDataOutputStream.java   
/**
     * Writes the given string in UTF format.  This method is used in
     * situations where the UTF encoding length of the string is already
     * known; specifying it explicitly avoids a prescan of the string to
     * determine its UTF length.
     */
    void writeUTF(String s, int utflen) throws IOException
    {
        if (utflen > 0xFFFFL)
        {
            throw new UTFDataFormatException();
        }

        writeInt(utflen);
//        write7BitEncodedInt(utflen);
        //writeShort((int) utflen);
        if (utflen == (long) s.length())
        {
            writeBytes(s);
        }
        else
        {
            writeUTFBody(s);
        }
    }
项目:j2objc    文件:ModifiedUtf8.java   
/**
 * Returns the number of bytes the modified UTF-8 representation of 's' would take. Note
 * that this is just the space for the bytes representing the characters, not the length
 * which precedes those bytes, because different callers represent the length differently,
 * as two, four, or even eight bytes. If {@code shortLength} is true, we'll throw an
 * exception if the string is too long for its length to be represented by a short.
 */
public static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
    long result = 0;
    final int length = s.length();
    for (int i = 0; i < length; ++i) {
        char ch = s.charAt(i);
        if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
            ++result;
        } else if (ch <= 2047) {
            result += 2;
        } else {
            result += 3;
        }
        if (shortLength && result > 65535) {
            throw new UTFDataFormatException("String more than 65535 UTF bytes long");
        }
    }
    return result;
}
项目:systemml    文件:CacheDataOutput.java   
@Override
public void writeUTF(String s) throws IOException {
    int slen = s.length();
    int utflen = IOUtilFunctions.getUTFSize(s) - 2;
    if (utflen-2 > 65535)
        throw new UTFDataFormatException("encoded string too long: "+utflen);

    //write utf len (2 bytes) 
    writeShort(utflen);

    //write utf payload
    for( int i=0; i<slen; i++ ) {
        char c = s.charAt(i);
        if( c>= 0x0001 && c<=0x007F ) //1 byte range
            writeByte(c);
        else if( c>=0x0800 ) { //3 byte range
            _buff[_count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
            _buff[_count++] = (byte) (0x80 | ((c >>  6) & 0x3F));
            _buff[_count++] = (byte) (0x80 | ((c >>  0) & 0x3F));
        }
        else { //2 byte range and null
            _buff[_count++] = (byte) (0xC0 | ((c >>  6) & 0x1F));
            _buff[_count++] = (byte) (0x80 | ((c >>  0) & 0x3F));
        }
    }
}
项目:filestacker    文件:TextStackerTest.java   
/**
 * Testa diversos casos de strings utf-8: com caracteres de 1, 2 e 3 bytes
 */
@Test
public final void testStrBytesConversions() throws UTFDataFormatException {
    String[] data = {   "pàpêpípõpü",
                        "\t \n \r", 
                        "", 
                        " ", 
                        "ЊДОШПЦФ",
                        "ดตญทธยษส", 
                        "ヅテガシジツミポブ", 
                        "สçヅยãテОガ;ธ§Д" };

    for (String str : data) {
        assertEquals(str, TextStacker.toStr(TextStacker.toBytes(str)));
    }
}
项目:asakusafw-compiler    文件:DataIoUtils.java   
/**
 * Emulates {@link DataOutput#writeUTF(String)} without using it method.
 * @param output the target {@link DataOutput}
 * @param value the target value
 * @throws IOException if failed to write String into {@link DataOutput}
 */
public static void writeUTF(DataOutput output, String value) throws IOException {
    int size = computeUtfBodySize(value);
    if (size >>> Short.SIZE != 0) {
        throw new UTFDataFormatException("too long UTF string");
    }
    output.writeShort(size);
    for (int i = 0, n = value.length(); i < n; i++) {
        char c = value.charAt(i);
        if (c != CHAR_ZERO && c <= CHAR_MAX1) {
            output.write(c);
        } else if (c <= CHAR_MAX2) {
            output.write(MASK_HEAD2 | ((c >> 6) & MASK_BODY5));
            output.write(MASK_HEAD1 | (c & MASK_BODY6));
        } else {
            output.write(MASK_HEAD3 | ((c >> 12) & MASK_BODY4));
            output.write(MASK_HEAD1 | ((c >>  6) & MASK_BODY6));
            output.write(MASK_HEAD1 | (c & MASK_BODY6));
        }
    }
}
项目:In-the-Box-Fork    文件:UTFDataFormatExceptionTest.java   
/**
 * @tests java.io.UTFDataFormatException#UTFDataFormatException()
 */
@TestTargetNew(
        level = TestLevel.COMPLETE,
        method = "UTFDataFormatException",
        args = {}
    )
public void test_Constructor() {
    try {
        if (true) // To avoid unreachable code compilation error.
            throw new UTFDataFormatException();
        fail("Test 1: UTFDataFormatException expected.");
    } catch (UTFDataFormatException e) {
        assertNull("Test 2: Null expected for exceptions constructed without a message.",
                e.getMessage());
    }
}