public static String byteBuf2String(ByteBuf buf, Charset charset) throws UTFDataFormatException, IndexOutOfBoundsException, CharacterCodingException { int byteLen = buf.readableBytes(); if (charset.equals(StandardCharsets.US_ASCII)) { return Utf8Reader.readUtf8(buf, byteLen); } else if (charset.equals(StandardCharsets.UTF_8)) { try { return Utf8Reader.readUtf8(buf.duplicate(), (int) (byteLen * 1.4)); } catch (IndexOutOfBoundsException e) { // try again with 3 bytes per char return Utf8Reader.readUtf8(buf, byteLen * 3); } } else { return byteBuffersToString(buf.nioBuffers(), charset); } }
/** * Returns the number of bytes the modified UTF8 representation of 's' would take. */ public static long countBytes(String s, boolean shortLength) throws UTFDataFormatException { long result = 0; final int length = s.length(); for (int i = 0; i < length; ++i) { char ch = s.charAt(i); if (ch != 0 && ch <= 127) { // U+0000 uses two bytes. ++result; } else if (ch <= 2047) { result += 2; } else { result += 3; } if (shortLength && result > 65535) { throw new UTFDataFormatException("String more than 65535 UTF bytes long"); } } return result; }
public void writeUTF(String str) throws IOException { int len = str.length(); if (len > 0xffff) { throw new UTFDataFormatException(); } int bytecount = StringConverter.getUTFSize(str); if (bytecount > 0xffff) { throw new UTFDataFormatException(); } // writeChar(bytecount); HsqlByteArrayOutputStream bao = new HsqlByteArrayOutputStream(bytecount); StringConverter.stringToUTFBytes(str, bao); this.write(bao.getBuffer(), 0, bao.size()); }
public String readString() { int offset = readInt(); int savedPosition = data.position(); int savedLimit = data.limit(); data.position(offset); data.limit(data.capacity()); try { int expectedLength = readUleb128(); String result = Mutf8.decode(this, new char[expectedLength]); if (result.length() != expectedLength) { throw new DexException("Declared length " + expectedLength + " doesn't match decoded length of " + result.length()); } return result; } catch (UTFDataFormatException e) { throw new DexException(e); } finally { data.position(savedPosition); data.limit(savedLimit); } }
/** * Returns the number of bytes the modified UTF8 representation of 's' would take. */ private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException { long result = 0; final int length = s.length(); for (int i = 0; i < length; ++i) { char ch = s.charAt(i); if (ch != 0 && ch <= 127) { // U+0000 uses two bytes. ++result; } else if (ch <= 2047) { result += 2; } else { result += 3; } if (shortLength && result > 65535) { throw new UTFDataFormatException("String more than 65535 UTF bytes long"); } } return result; }
public String readString() { int offset = readInt(); int savedPosition = data.position(); int savedLimit = data.limit(); data.position(offset); data.limit(data.capacity()); try { int expectedLength = readUleb128(); String result = Mutf8.decode(this, new char[expectedLength]); if (result.length() != expectedLength) { throw new DexException2("Declared length " + expectedLength + " doesn't match decoded length of " + result.length()); } return result; } catch (UTFDataFormatException e) { throw new DexException2(e); } finally { data.position(savedPosition); data.limit(savedLimit); } }
/** * Used when we know the max size will fit in the current buffer. */ private final void writeQuickFullUTF(String str, int strlen) throws IOException { int utfSizeIdx = this.buffer.position(); // skip bytes reserved for length this.buffer.position(utfSizeIdx + 2); for (int i = 0; i < strlen; i++) { int c = str.charAt(i); if ((c >= 0x0001) && (c <= 0x007F)) { this.buffer.put((byte) c); } else if (c > 0x07FF) { this.buffer.put((byte) (0xE0 | ((c >> 12) & 0x0F))); this.buffer.put((byte) (0x80 | ((c >> 6) & 0x3F))); this.buffer.put((byte) (0x80 | ((c >> 0) & 0x3F))); } else { this.buffer.put((byte) (0xC0 | ((c >> 6) & 0x1F))); this.buffer.put((byte) (0x80 | ((c >> 0) & 0x3F))); } } int utflen = this.buffer.position() - (utfSizeIdx + 2); if (utflen > 65535) { // act as if we wrote nothing to this buffer this.buffer.position(utfSizeIdx); throw new UTFDataFormatException(); } this.buffer.putShort(utfSizeIdx, (short) utflen); }
public static long countBytes(String s, boolean shortLength) throws UTFDataFormatException { long result = 0; int length = s.length(); int i = 0; while (i < length) { char ch = s.charAt(i); if (ch != '\u0000' && ch <= '') { result++; } else if (ch <= '߿') { result += 2; } else { result += 3; } if (!shortLength || result <= 65535) { i++; } else { throw new UTFDataFormatException("String more than 65535 UTF bytes long"); } } return result; }
/** * Tests that <code>skipFully</code> throws exception if there is a UTF-8 * encoding error in the stream * * @throws IOException if the test fails for some unexpected reason */ public void testSkipFullyOnInvalidStreamCJK() throws IOException { final int charLength = 10; InputStream in = new ReaderToUTF8Stream( new LoopingAlphabetReader(charLength, CharAlphabet.cjkSubset()), charLength, 0, "ignored-test-type"); in.skip(2L); // Skip encoded length added by ReaderToUTF8Stream. in.skip(1L); // Skip one more byte to trigger a UTF error. try { UTF8Util.skipFully(in, charLength); fail("Should have failed because of UTF error."); } catch (UTFDataFormatException udfe) { // As expected, do nothing. } }
/** * Demonstrates that skipping incorrectly encoded character sequences * works because the stream is not checked for well-formedness. */ public void testSkippingInvalidEncodingWorks() throws IOException { // The array contains three valid characters and one invalid three-byte // representation that only has two bytes present. // When skipping, this sequence is (incorrectly) taken as a sequence of // three characters ('a' - some three byte character - 'a'). // 0xef = 11101111, 0xb8 = 10111000 byte[] data = {'a', (byte)0xef, (byte)0xb8, 'a', 'a'}; byte[] dataWithLength = {0x0, 0x5, 'a', (byte)0xef, (byte)0xb8, 'a', 'a'}; InputStream is = new ByteArrayInputStream(data); // This is actually incorrect, but does work currently. UTF8Util.skipFully(is, 3); // Verify that decoding this actually fails. DataInputStream dis = new DataInputStream( new ByteArrayInputStream(dataWithLength)); try { dis.readUTF(); fail("UTF-8 expected to be invalid, read should fail"); } catch (UTFDataFormatException udfe) { // This is expected, since the UTF-8 encoding is invalid } }
@Override public void writeUTF(String str) throws IOException { int utfCount = 0, length = str.length(); for (int i = 0; i < length; i++) { int charValue = str.charAt(i); if (charValue > 0 && charValue <= 127) { utfCount++; } else if (charValue <= 2047) { utfCount += 2; } else { utfCount += 3; } } if (utfCount > 65535) { throw new UTFDataFormatException(); //$NON-NLS-1$ } position += utfCount * 2; }
public String readString() { int offset = readInt(); int savedPosition = position; position = offset; try { int expectedLength = readUleb128(); String result = Mutf8.decode(this, new char[expectedLength]); if (result.length() != expectedLength) { throw new DexException("Declared length " + expectedLength + " doesn't match decoded length of " + result.length()); } return result; } catch (UTFDataFormatException e) { throw new DexException(e); } finally { position = savedPosition; } }
@Override public Instant decode(InputStream inStream) throws CoderException, IOException { long shiftedMillis; try { shiftedMillis = new DataInputStream(inStream).readLong(); } catch (EOFException | UTFDataFormatException exn) { // These exceptions correspond to decoding problems, so change // what kind of exception they're branded as. throw new CoderException(exn); } // Produces an {@link Instant} from a {@code long} representing its millis-since-epoch, // but shifted so that the byte representation of negative values are lexicographically // ordered before the byte representation of positive values. // // This deliberately utilizes the well-defined overflow for {@code long} values. // See http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.18.2 return new Instant(shiftedMillis + Long.MIN_VALUE); }
@Override public String decode(InputStream inStream, Context context) throws IOException { if (context.isWholeStream) { byte[] bytes = StreamUtils.getBytes(inStream); return new String(bytes, StandardCharsets.UTF_8); } else { try { return readString(new DataInputStream(inStream)); } catch (EOFException | UTFDataFormatException exn) { // These exceptions correspond to decoding problems, so change // what kind of exception they're branded as. throw new CoderException(exn); } } }
@Override public Byte decode(InputStream inStream) throws IOException, CoderException { try { // value will be between 0-255, -1 for EOF int value = inStream.read(); if (value == -1) { throw new EOFException("EOF encountered decoding 1 byte from input stream"); } return (byte) value; } catch (EOFException | UTFDataFormatException exn) { // These exceptions correspond to decoding problems, so change // what kind of exception they're branded as. throw new CoderException(exn); } }
/** * Writes the given string in UTF format. This method is used in * situations where the UTF encoding length of the string is already * known; specifying it explicitly avoids a prescan of the string to * determine its UTF length. */ void writeUTF(String s, int utflen) throws IOException { if (utflen > 0xFFFFL) { throw new UTFDataFormatException(); } writeInt(utflen); // write7BitEncodedInt(utflen); //writeShort((int) utflen); if (utflen == (long) s.length()) { writeBytes(s); } else { writeUTFBody(s); } }
/** * Returns the number of bytes the modified UTF-8 representation of 's' would take. Note * that this is just the space for the bytes representing the characters, not the length * which precedes those bytes, because different callers represent the length differently, * as two, four, or even eight bytes. If {@code shortLength} is true, we'll throw an * exception if the string is too long for its length to be represented by a short. */ public static long countBytes(String s, boolean shortLength) throws UTFDataFormatException { long result = 0; final int length = s.length(); for (int i = 0; i < length; ++i) { char ch = s.charAt(i); if (ch != 0 && ch <= 127) { // U+0000 uses two bytes. ++result; } else if (ch <= 2047) { result += 2; } else { result += 3; } if (shortLength && result > 65535) { throw new UTFDataFormatException("String more than 65535 UTF bytes long"); } } return result; }
@Override public void writeUTF(String s) throws IOException { int slen = s.length(); int utflen = IOUtilFunctions.getUTFSize(s) - 2; if (utflen-2 > 65535) throw new UTFDataFormatException("encoded string too long: "+utflen); //write utf len (2 bytes) writeShort(utflen); //write utf payload for( int i=0; i<slen; i++ ) { char c = s.charAt(i); if( c>= 0x0001 && c<=0x007F ) //1 byte range writeByte(c); else if( c>=0x0800 ) { //3 byte range _buff[_count++] = (byte) (0xE0 | ((c >> 12) & 0x0F)); _buff[_count++] = (byte) (0x80 | ((c >> 6) & 0x3F)); _buff[_count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); } else { //2 byte range and null _buff[_count++] = (byte) (0xC0 | ((c >> 6) & 0x1F)); _buff[_count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); } } }
/** * Testa diversos casos de strings utf-8: com caracteres de 1, 2 e 3 bytes */ @Test public final void testStrBytesConversions() throws UTFDataFormatException { String[] data = { "pàpêpípõpü", "\t \n \r", "", " ", "ЊДОШПЦФ", "ดตญทธยษส", "ヅテガシジツミポブ", "สçヅยãテОガ;ธ§Д" }; for (String str : data) { assertEquals(str, TextStacker.toStr(TextStacker.toBytes(str))); } }
/** * Emulates {@link DataOutput#writeUTF(String)} without using it method. * @param output the target {@link DataOutput} * @param value the target value * @throws IOException if failed to write String into {@link DataOutput} */ public static void writeUTF(DataOutput output, String value) throws IOException { int size = computeUtfBodySize(value); if (size >>> Short.SIZE != 0) { throw new UTFDataFormatException("too long UTF string"); } output.writeShort(size); for (int i = 0, n = value.length(); i < n; i++) { char c = value.charAt(i); if (c != CHAR_ZERO && c <= CHAR_MAX1) { output.write(c); } else if (c <= CHAR_MAX2) { output.write(MASK_HEAD2 | ((c >> 6) & MASK_BODY5)); output.write(MASK_HEAD1 | (c & MASK_BODY6)); } else { output.write(MASK_HEAD3 | ((c >> 12) & MASK_BODY4)); output.write(MASK_HEAD1 | ((c >> 6) & MASK_BODY6)); output.write(MASK_HEAD1 | (c & MASK_BODY6)); } } }
/** * @tests java.io.UTFDataFormatException#UTFDataFormatException() */ @TestTargetNew( level = TestLevel.COMPLETE, method = "UTFDataFormatException", args = {} ) public void test_Constructor() { try { if (true) // To avoid unreachable code compilation error. throw new UTFDataFormatException(); fail("Test 1: UTFDataFormatException expected."); } catch (UTFDataFormatException e) { assertNull("Test 2: Null expected for exceptions constructed without a message.", e.getMessage()); } }