/** * Performs IDN ToASCII encoding and canonicalize the result to lowercase. e.g. This converts * {@code ☃.net} to {@code xn--n3h.net}, and {@code WwW.GoOgLe.cOm} to {@code www.google.com}. * {@code null} will be returned if the input cannot be ToASCII encoded or if the result * contains unsupported ASCII characters. */ public static String domainToAscii(String input) { try { String result = IDN.toASCII(input).toLowerCase(Locale.US); if (result.isEmpty()) return null; // Confirm that the IDN ToASCII result doesn't contain any illegal characters. if (containsInvalidHostnameAsciiCodes(result)) { return null; } // TODO: implement all label limits. return result; } catch (IllegalArgumentException e) { return null; } }
/** * Validates the given UTF-8 domain. This method is IDN-aware and tests the nameprep'd * equivalent. This splits on the full stop (.), ensures that the total length of the domain * including the implicit label ("") length octet does not exceed the maximum of 255 bytes. Each * label is tested using the validateLabel method. * * @param domain * The domain to validate * @return null if no errors, a string describing the problem otherwise */ public static String validate(String domain) { String errorMsg = null; String[] labels = domain.split("\\."); // The count of size octets, including the implicit trailing size octet per RFC 1034 int totalLength = labels.length + 1; for (String label : labels) { String asciiLabel = IDN.toASCII(label); totalLength += asciiLabel.length(); if (totalLength > MAX_DOMAIN_LENGTH) { errorMsg = "Invalid domain name, \"" + domain + "\" is too long"; break; } errorMsg = validateLabel(label); if (errorMsg != null) { errorMsg = "Invalid domain name, " + errorMsg; break; } } return errorMsg; }
private String toAscii(String unicodeString) throws IllegalArgumentException { StringBuilder asciiString = new StringBuilder(); int start = 0; int end = unicodeString.length() <= 63 ? unicodeString.length() : 63; while (true) { // IDN.toASCII only supports a max "label" length of 63 characters. Need to chunk the input in these sizes asciiString.append(IDN.toASCII(unicodeString.substring(start, end))); if (end == unicodeString.length()) { break; } start = end; end = start + 63 > unicodeString.length() ? unicodeString.length() : start + 63; } return asciiString.toString(); }
static Dns parse(String hostname) { int lastDot = -1; for (int i = 0; i < hostname.length(); i++) { char c = hostname.charAt(i); if (!DNS.matches(c)) { throw new InvalidHostException(hostname, i); } else if (c == '.') { if (lastDot == i - 1) { throw new InvalidHostException(hostname, i); } lastDot = i; } } String lower = hostname.toLowerCase(Locale.US); return new AutoValue_Dns(lower, IDN.toUnicode(lower)); }
private static String readName(DataInputStream dis, byte[] data) throws IOException { int c = dis.readUnsignedByte(); if ((c & 192) == 192) { c = ((c & 63) << 8) + dis.readUnsignedByte(); HashSet<Integer> jumps = new HashSet(); jumps.add(Integer.valueOf(c)); return readName(data, c, jumps); } else if (c == 0) { return ""; } else { byte[] b = new byte[c]; dis.readFully(b); String s = IDN.toUnicode(new String(b)); String t = readName(dis, data); if (t.length() > 0) { return s + "." + t; } return s; } }
private String toAscii(String unicodeString) throws IllegalArgumentException { StringBuilder asciiString = new StringBuilder(256); int start = 0; int end = unicodeString.length() <= 63 ? unicodeString.length() : 63; while (true) { // IDN.toASCII only supports a max "label" length of 63 characters. Need to chunk the input in these sizes asciiString.append(IDN.toASCII(unicodeString.substring(start, end))); if (end == unicodeString.length()) { break; } start = end; end = start + 63 > unicodeString.length() ? unicodeString.length() : start + 63; } return asciiString.toString(); }
/** * Performs IDN ToASCII encoding and canonicalize the result to lowercase. e.g. This converts * {@code ☃.net} to {@code xn--n3h.net}, and {@code WwW.GoOgLe.cOm} to {@code www.google.com}. * {@code null} will be returned if the input cannot be ToASCII encoded or if the result * contains unsupported ASCII characters. */ public static String domainToAscii(String input) { try { String result = IDN.toASCII(input).toLowerCase(Locale.US); if (result.isEmpty()) { return null; } // Confirm that the IDN ToASCII result doesn't contain any illegal characters. if (containsInvalidHostnameAsciiCodes(result)) { return null; } // TODO: implement all label limits. return result; } catch (IllegalArgumentException e) { return null; } }
private static boolean matchDomain(String domain) { // if we have a trailing dot the domain part we have an invalid email address. // the regular expression match would take care of this, but IDN.toASCII drops the trailing '.' if (domain.endsWith(".")) { return false; } String asciiString; try { asciiString = IDN.toASCII(domain); } catch (IllegalArgumentException e) { return false; } if (asciiString.length() > MAX_DOMAIN_PART_LENGTH) { return false; } Matcher matcher = DOMAIN_PATTERN.matcher(asciiString); return matcher.matches(); }
@Override public String generateUniqueKey(String name) throws IllegalStateException { String punyCode = IDN.toASCII(name).toUpperCase(); String sanitizedKey = punyCode.replaceAll("\\W", "_"); String resultKey = sanitizedKey.toUpperCase(); int counter = 0; while (this.has(resultKey)) { counter++; resultKey = (sanitizedKey + "." + UUID.randomUUID()).toUpperCase(); if (counter > MAX_UNIQUE_KEY_ATTEMPTS) { throw new IllegalStateException("Could not generate unique entity key"); } } return resultKey; }
/** * Performs IDN ToASCII encoding and canonicalize the result to lowercase. e.g. This converts * {@code ☃.net} to {@code xn--n3h.net}, and {@code WwW.GoOgLe.cOm} to {@code www.google.com}. * {@code null} will be returned if the input cannot be ToASCII encoded or if the result * contains unsupported ASCII characters. */ @Nullable private static String domainToAscii( String input ) { try { String result = IDN.toASCII( input ).toLowerCase( Locale.US ); if ( ( result == null ) || result.isEmpty() ) { return null; } // Confirm that the IDN ToASCII result doesn't contain any illegal characters. if ( containsInvalidHostnameAsciiCodes( result ) ) { return null; } // TODO: implement all label limits. return result; } catch ( IllegalArgumentException e ) { return null; } }
/** * Parse a domain name starting at the current offset and moving the input * stream pointer past this domain name (even if cross references occure). * * @param dis The input stream. * @param data The raw data (for cross references). * @return The domain name string. * @throws IOException Should never happen. */ private static String readName(DataInputStream dis, byte[] data) throws IOException { int c = dis.readUnsignedByte(); if ((c & 0xc0) == 0xc0) { c = ((c & 0x3f) << 8) + dis.readUnsignedByte(); HashSet<Integer> jumps = new HashSet<Integer>(); jumps.add(c); return readName(data, c, jumps); } if (c == 0) { return ""; } byte[] b = new byte[c]; dis.readFully(b); String s = IDN.toUnicode(new String(b)); String t = readName(dis, data); if (t.length() > 0) { s = s + "." + t; } return s; }
@Override public String filter(URL sourceUrl, Metadata sourceMetadata, String urlToFilter) { try { URL url = new URL(urlToFilter); String hostName = url.getHost(); if (isAscii(hostName)) { return urlToFilter; } hostName = IDN.toASCII(url.getHost()); if (hostName.equals(url.getHost())) { return urlToFilter; } urlToFilter = new URL(url.getProtocol(), hostName, url.getPort(), url.getFile()).toString(); } catch (MalformedURLException e) { return null; } return urlToFilter; }
/** * Parse a domain name starting at the current offset and moving the input * stream pointer past this domain name (even if cross references occure). * * @param dis The input stream. * @param data The raw data (for cross references). * @return The domain name string. * @throws IOException Should never happen. */ private static String readName(DataInputStream dis, byte data[]) throws IOException { int c = dis.readUnsignedByte(); if ((c & 0xc0) == 0xc0) { c = ((c & 0x3f) << 8) + dis.readUnsignedByte(); HashSet<Integer> jumps = new HashSet<Integer>(); jumps.add(c); return readName(data, c, jumps); } if (c == 0) { return ""; } byte b[] = new byte[c]; dis.readFully(b); String s = IDN.toUnicode(new String(b)); String t = readName(dis, data); if (t.length() > 0) { s = s + "." + t; } return s; }
@Override protected void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) throws Exception { if (!handshaken && in.readableBytes() >= 5) { String hostname = sniHostNameFromHandshakeInfo(in); if (hostname != null) { hostname = IDN.toASCII(hostname, IDN.ALLOW_UNASSIGNED).toLowerCase(Locale.US); } this.hostname = hostname; // the mapping will return default context when this.hostname is null selectedContext = mapping.map(hostname); } if (handshaken) { SslHandler sslHandler = selectedContext.newHandler(ctx.alloc()); ctx.pipeline().replace(this, SslHandler.class.getName(), sslHandler); } }
/** * converts IDN to ASCII if needed * @param addr */ public static void fixIDN( InternetAddress addr ) { String address = addr.getAddress(); int pos = address.indexOf( '@' ); if ( pos > 0 && pos < address.length() - 1 ) { String domain = address.substring( pos + 1 ); if ( !StringUtil.isAscii( domain ) ) { domain = IDN.toASCII( domain ); addr.setAddress( address.substring( 0, pos ) + "@" + domain ); } } }
URL encodeUri(Uri uri) throws MalformedURLException { StringBuilder uriStringBuilder = new StringBuilder(); uriStringBuilder.append(uri.getScheme()).append("://"); String host = IDN.toASCII(uri.getHost()); uriStringBuilder.append(host); int port = uri.getPort(); if (port != -1) { uriStringBuilder.append(':').append(port); } String path = uri.getEncodedPath(); if (!StringUtils.isEmpty(path)) { encodeUriAppend(uriStringBuilder, path); } String query = uri.getEncodedQuery(); if (!StringUtils.isEmpty(query)) { uriStringBuilder.append('?'); encodeUriAppend(uriStringBuilder, query); } return new URL(uriStringBuilder.toString()); }
private boolean matchDomain(final String domain) { // if we have a trailing dot the domain part we have an invalid email address. // the regular expression match would take care of this, but IDN.toASCII drops the trailing '.' if (domain.endsWith(".")) { return false; } String asciiString; try { asciiString = IDN.toASCII(domain); } catch (final IllegalArgumentException e) { return false; } if (asciiString.length() > MAX_DOMAIN_PART_LENGTH) { return false; } final MatchResult matcher = DOMAIN_PATTERN.exec(asciiString); return matcher != null; }
private static boolean isValidDomainAddress(final String domain, final RegExp pattern) { // if we have a trailing dot the domain part we have an invalid email address. // the regular expression match would take care of this, but IDN.toASCII drops the trailing '.' if (domain.endsWith(".")) { return false; } final MatchResult matcher = pattern.exec(domain); if (matcher == null) { return false; } String asciiString; try { asciiString = IDN.toASCII(domain); } catch (final IllegalArgumentException e) { return false; } return asciiString.length() <= MAX_DOMAIN_PART_LENGTH; }
/** * @tests {@link java.net.IDN#toUnicode(String)} * * @since 1.6 */ public void test_ToUnicode_LString() { try { IDN.toUnicode(null); fail("should throw NullPointerException"); } catch (NullPointerException e) { // expected } assertEquals("", IDN.toUnicode("")); assertEquals("www.bcher.de", IDN.toUnicode("www.bcher.de")); assertEquals("www.b\u00FCcher.de", IDN.toUnicode("www.b\u00FCcher.de")); assertEquals("www.\u65E5\u672C\u5E73.jp", IDN .toUnicode("www.\u65E5\u672C\u5E73.jp")); assertEquals("www.\u65E5\u672C\u5E73.jp", IDN.toUnicode("www\uFF0Exn--gwtq9nb2a\uFF61jp")); assertEquals("www.\u65E5\u672C\u5E73.jp", IDN.toUnicode("www.xn--gwtq9nb2a.jp")); }
public void testIsIDNtoASCIIBroken() { System.out.println(">>DomainValidatorTest.testIsIDNtoASCIIBroken()"); final String input = "."; final boolean ok = input.equals(IDN.toASCII(input)); System.out.println("IDN.toASCII is " + (ok? "OK" : "BROKEN")); String props[] = { "java.version", // Java Runtime Environment version "java.vendor", // Java Runtime Environment vendor "java.vm.specification.version", // Java Virtual Machine specification version "java.vm.specification.vendor", // Java Virtual Machine specification vendor "java.vm.specification.name", // Java Virtual Machine specification name "java.vm.version", // Java Virtual Machine implementation version "java.vm.vendor", // Java Virtual Machine implementation vendor "java.vm.name", // Java Virtual Machine implementation name "java.specification.version", // Java Runtime Environment specification version "java.specification.vendor", // Java Runtime Environment specification vendor "java.specification.name", // Java Runtime Environment specification name "java.class.version", // Java class format version number }; for(String t : props) { System.out.println(t + "=" + System.getProperty(t)); } System.out.println("<<DomainValidatorTest.testIsIDNtoASCIIBroken()"); }
/** * Parse a domain name starting at the current offset and moving the input * stream pointer past this domain name (even if cross references occure). * @param dis The input stream. * @param data The raw data (for cross references). * @return The domain name string. * @throws IOException Should never happen. */ public static String parse(DataInputStream dis, byte data[]) throws IOException { int c = dis.readUnsignedByte(); if ((c & 0xc0) == 0xc0) { c = ((c & 0x3f) << 8) + dis.readUnsignedByte(); HashSet<Integer> jumps = new HashSet<Integer>(); jumps.add(c); return parse(data, c, jumps); } if (c == 0) { return ""; } byte b[] = new byte[c]; dis.readFully(b); String s = IDN.toUnicode(new String(b)); String t = parse(dis, data); if (t.length() > 0) { s = s + "." + t; } return s; }
@Override protected void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) throws Exception { if (!handshaken && in.readableBytes() >= 5) { this.hostname = sniHostNameFromHandshakeInfo(in); if (this.hostname != null) this.hostname = IDN.toASCII(this.hostname, IDN.ALLOW_UNASSIGNED).toLowerCase(Locale.US); else this.hostname = "localhost"; // the mapping will return default context when this.hostname is null this.selectedContext = this.man.findSslContextFactory(this.hostname); } if (handshaken) { SslHandler sslHandler = new SslHandler(this.selectedContext.getServerEngine(this.hostname)); ctx.pipeline().replace("ssl", "ssl", sslHandler); } }
/** * Returns the effective top-level domain plus one (eTLD+1) by referencing the public suffix list. * Returns null if the domain is a public suffix. * * <p>Here are some examples: <pre>{@code * assertEquals("google.com", getEffectiveTldPlusOne("google.com")); * assertEquals("google.com", getEffectiveTldPlusOne("www.google.com")); * assertNull(getEffectiveTldPlusOne("com")); * }</pre> * * @param domain A canonicalized domain. An International Domain Name (IDN) should be punycode * encoded. */ public String getEffectiveTldPlusOne(String domain) { if (domain == null) throw new NullPointerException("domain == null"); // We use UTF-8 in the list so we need to convert to Unicode. String unicodeDomain = IDN.toUnicode(domain); String[] domainLabels = unicodeDomain.split("\\."); String[] rule = findMatchingRule(domainLabels); if (domainLabels.length == rule.length && rule[0].charAt(0) != EXCEPTION_MARKER) { // The domain is a public suffix. return null; } int firstLabelOffset; if (rule[0].charAt(0) == EXCEPTION_MARKER) { // Exception rules hold the effective TLD plus one. firstLabelOffset = domainLabels.length - rule.length; } else { // Otherwise the rule is for a public suffix, so we must take one more label. firstLabelOffset = domainLabels.length - (rule.length + 1); } StringBuilder effectiveTldPlusOne = new StringBuilder(); String[] punycodeLabels = domain.split("\\."); for (int i = firstLabelOffset; i < punycodeLabels.length; i++) { effectiveTldPlusOne.append(punycodeLabels[i]).append('.'); } effectiveTldPlusOne.deleteCharAt(effectiveTldPlusOne.length() - 1); return effectiveTldPlusOne.toString(); }
/** * If {@code host} is an IP address, this returns the IP address in canonical form. * * <p>Otherwise this performs IDN ToASCII encoding and canonicalize the result to lowercase. For * example this converts {@code ☃.net} to {@code xn--n3h.net}, and {@code WwW.GoOgLe.cOm} to * {@code www.google.com}. {@code null} will be returned if the host cannot be ToASCII encoded or * if the result contains unsupported ASCII characters. */ public static String canonicalizeHost(String host) { // If the input contains a :, it’s an IPv6 address. if (host.contains(":")) { // If the input is encased in square braces "[...]", drop 'em. InetAddress inetAddress = host.startsWith("[") && host.endsWith("]") ? decodeIpv6(host, 1, host.length() - 1) : decodeIpv6(host, 0, host.length()); if (inetAddress == null) return null; byte[] address = inetAddress.getAddress(); if (address.length == 16) return inet6AddressToAscii(address); throw new AssertionError("Invalid IPv6 address: '" + host + "'"); } try { String result = IDN.toASCII(host).toLowerCase(Locale.US); if (result.isEmpty()) return null; // Confirm that the IDN ToASCII result doesn't contain any illegal characters. if (containsInvalidHostnameAsciiCodes(result)) { return null; } // TODO: implement all label limits. return result; } catch (IllegalArgumentException e) { return null; } }
/** * Attempts to convert a Unicode string to an ASCII string using IDN rules. * As of May 2014, the underlying Java function IDNA2003. * @param src String to convert. * @return: String containing only ASCII characters on success, null on * failure. */ @CalledByNative private static String idnToASCII(String src) { try { return IDN.toASCII(src, IDN.USE_STD3_ASCII_RULES); } catch (Exception e) { return null; } }
/** Normalize an email string for comparison. */ public static String normalizeEmail(String email) { if (email == null) return null; String[] parts = email.split("@"); if (parts.length != 2) throw new RuntimeException("Invalid email address."); return parts[0].toLowerCase()+"@"+IDN.toASCII(parts[1]).toLowerCase(); }
public boolean apply(String p_apply_1_) { if (p_apply_1_.length() == 0) { return true; } else { String[] astring = p_apply_1_.split(":"); if (astring.length == 0) { return true; } else { try { String s = IDN.toASCII(astring[0]); return true; } catch (IllegalArgumentException var4) { return false; } } } }
/** * @return IDN normalized hostname */ public String convert(final String hostname) { if(!PreferencesFactory.get().getBoolean("connection.hostname.idn")) { return StringUtils.strip(hostname); } if(StringUtils.isNotEmpty(hostname)) { try { // Convenience function that implements the IDNToASCII operation as defined in // the IDNA RFC. This operation is done on complete domain names, e.g: "www.example.com". // It is important to note that this operation can fail. If it fails, then the input // domain name cannot be used as an Internationalized Domain Name and the application // should have methods defined to deal with the failure. // IDNA.DEFAULT Use default options, i.e., do not process unassigned code points // and do not use STD3 ASCII rules If unassigned code points are found // the operation fails with ParseException final String idn = IDN.toASCII(StringUtils.strip(hostname)); if(log.isDebugEnabled()) { if(!StringUtils.equals(StringUtils.strip(hostname), idn)) { log.debug(String.format("IDN hostname for %s is %s", hostname, idn)); } } if(StringUtils.isNotEmpty(idn)) { return idn; } } catch(IllegalArgumentException e) { log.warn(String.format("Failed to convert hostname %s to IDNA", hostname), e); } } return StringUtils.strip(hostname); }
/** * Attempts to match the given {@link SNIServerName}. * * @param serverName * the {@link SNIServerName} instance on which this matcher * performs match operations * * @return {@code true} if, and only if, the matcher matches the * given {@code serverName} * * @throws NullPointerException if {@code serverName} is {@code null} * @throws IllegalArgumentException if {@code serverName} is * not of {@code StandardConstants#SNI_HOST_NAME} type * * @see SNIServerName */ @Override public boolean matches(SNIServerName serverName) { if (serverName == null) { throw new NullPointerException( "The SNIServerName argument cannot be null"); } SNIHostName hostname; if (!(serverName instanceof SNIHostName)) { if (serverName.getType() != StandardConstants.SNI_HOST_NAME) { throw new IllegalArgumentException( "The server name type is not host_name"); } try { hostname = new SNIHostName(serverName.getEncoded()); } catch (NullPointerException | IllegalArgumentException e) { return false; } } else { hostname = (SNIHostName)serverName; } // Let's first try the ascii name matching String asciiName = hostname.getAsciiName(); if (pattern.matcher(asciiName).matches()) { return true; } // May be an internationalized domain name, check the Unicode // representations. return pattern.matcher(IDN.toUnicode(asciiName)).matches(); }
/** * @param info will have the errors and transitional differences set if * appropriate. */ static InternetDomainName toDomainName(String decodedHost, IDNA.Info info) { String unicodeName = IDN.toUnicode(decodedHost, IDN.USE_STD3_ASCII_RULES); IDNA idna = IDNA.getUTS46Instance(IDNA.DEFAULT); StringBuilder nameBuffer = new StringBuilder(decodedHost.length() + 16); nameBuffer = idna.nameToASCII(decodedHost, nameBuffer, info); return InternetDomainName.from(unicodeName); }