Java 类java.text.Normalizer 实例源码
项目:solo-spring
文件:URICoder.java
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s
* The string the encode (assuming ASCII characters only)
* @param e
* A character that does not require encoding if found in the
* string.
*/
private static String encode_UTF8(String s, char e) {
// TODO: Normalizer requires Java 6!
String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isUnreserved(b) || b == e) {
sb.append((char) b);
} else {
appendEscape(sb, (byte) b);
}
}
return sb.toString();
}
项目:solo-spring
文件:URICoder.java
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s
* The string the encode (assuming ASCII characters only)
*/
private static String minimalEncode_UTF8(String s) {
// TODO: Normalizer requires Java 6!
String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isLegal(b)) {
sb.append((char) b);
} else {
appendEscape(sb, (byte) b);
}
}
return sb.toString();
}
项目:OpenJSharp
文件:NormalizerBase.java
/**
* Test if a string is in a given normalization form.
* This is semantically equivalent to source.equals(normalize(source, mode)).
*
* Unlike quickCheck(), this function returns a definitive result,
* never a "maybe".
* For NFD, NFKD, and FCD, both functions work exactly the same.
* For NFC and NFKC where quickCheck may return "maybe", this function will
* perform further tests to arrive at a true/false result.
* @param str the input string to be checked to see if it is normalized
* @param form the normalization form
* @param options the optional features to be enabled.
*/
public static boolean isNormalized(String str, Normalizer.Form form, int options) {
switch (form) {
case NFC:
return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFD:
return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFKC:
return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFKD:
return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
}
throw new IllegalArgumentException("Unexpected normalization form: " +
form);
}
项目:OpenJSharp
文件:CDataTransferer.java
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
long format, Transferable transferable) throws IOException {
if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
{
String charset = getDefaultTextCharset();
if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
try {
charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
} catch (UnsupportedFlavorException cannotHappen) {
}
}
return new URL(new String(bytes, charset));
}
if (format == CF_STRING) {
bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
}
return super.translateBytes(bytes, flavor, format, transferable);
}
项目:OperatieBRP
文件:ZoekCriterium.java
public void setWaarde(final String waarde) {
this.waarde = waarde;
if (waarde == null) {
this.slimZoekenWaarde = null;
} else if (waarde.startsWith("\\")) {
this.exact = true;
this.slimZoekenWaarde = waarde.substring(1);
} else if (waarde.endsWith("*")) {
this.wildcard = true;
this.slimZoekenWaarde = waarde.substring(0, waarde.length() - 1);
} else {
this.slimZoekenWaarde = waarde;
}
if (waarde != null && !this.exact) {
if (!waarde.matches(".*[A-Z].*") && attribuut.isString()) {
this.caseInsensitive = true;
}
String normalizedWaarde = Normalizer.normalize(waarde, Normalizer.Form.NFD);
Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
if (pattern.matcher(normalizedWaarde).find()) {
this.diakriet = true;
}
}
}
项目:OperatieBRP
文件:Utils.java
/**
* Converteer een naam naar een java enumeratie naam.
* @param javaNameBase naam
* @return enumeratie naam
*/
public static String convertToJavaEnumName(final String javaNameBase) {
if (javaNameBase.startsWith(LITERAL)) {
return StringEscapeUtils.unescapeJava(javaNameBase.replaceAll(String.format("^%s", LITERAL), ""));
} else {
String result = javaNameBase;
// Unaccent
result = Normalizer.normalize(result, Normalizer.Form.NFD);
// Replace whitespace with underscore
result = result.replaceAll("(\\s|-)", "_");
// Uppercase
result = result.toUpperCase();
// Remove unsupported characters
result = result.replaceAll("[^A-Z0-9_]", "");
// Remove duplicate seperators
result = result.replaceAll("_{2,}", "_");
return result;
}
}
项目:jdk8u-jdk
文件:NormalizerBase.java
/**
* Test if a string is in a given normalization form.
* This is semantically equivalent to source.equals(normalize(source, mode)).
*
* Unlike quickCheck(), this function returns a definitive result,
* never a "maybe".
* For NFD, NFKD, and FCD, both functions work exactly the same.
* For NFC and NFKC where quickCheck may return "maybe", this function will
* perform further tests to arrive at a true/false result.
* @param str the input string to be checked to see if it is normalized
* @param form the normalization form
* @param options the optional features to be enabled.
*/
public static boolean isNormalized(String str, Normalizer.Form form, int options) {
switch (form) {
case NFC:
return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFD:
return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFKC:
return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFKD:
return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
}
throw new IllegalArgumentException("Unexpected normalization form: " +
form);
}
项目:bibliome-java-utils
文件:Strings.java
/**
* Remove diacritics from the specified string.
* @param s
* @return a copy of the specified string with diacritics removed.
*/
public static final String removeDiacritics(String s) {
String n = Normalizer.normalize(s, Form.NFD);
StringBuilder sb = null;
for (int i = 0; i < n.length(); ++i) {
char c = n.charAt(i);
UnicodeBlock b = UnicodeBlock.of(c);
if (UnicodeBlock.COMBINING_DIACRITICAL_MARKS.equals(b) || UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT.equals(b)) {
if (sb == null) {
sb = new StringBuilder(n.length());
sb.append(n.substring(0, i));
}
continue;
}
if (sb != null)
sb.append(c);
}
if (sb == null)
return n;
return sb.toString();
}
项目:mapr-music
文件:SlugUtil.java
/**
* Converts specified string to it's slug representation, which can be used to generate readable and SEO-friendly
* URLs.
*
* @param input string, which will be converted.
* @return slug representation of string, which can be used to generate readable and SEO-friendly
* URLs.
*/
public static String toSlug(String input) {
String transliterated = transliterator.transform(input);
String noWhitespace = WHITESPACE.matcher(transliterated).replaceAll("-");
String normalized = Normalizer.normalize(noWhitespace, Normalizer.Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
slug = EDGESDHASHES.matcher(slug).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}
项目:mapr-music
文件:SlugService.java
/**
* Converts specified string to it's slug representation, which can be used to generate readable and SEO-friendly
* URLs.
*
* @param input string, which will be converted.
* @return slug representation of string, which can be used to generate readable and SEO-friendly
* URLs.
*/
public String toSlug(String input) {
String transliterated = transliterator.transform(input);
String noWhitespace = WHITESPACE.matcher(transliterated).replaceAll("-");
String normalized = Normalizer.normalize(noWhitespace, Normalizer.Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
slug = EDGESDHASHES.matcher(slug).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}
项目:TensorFlowDetector-App
文件:MainActivity.java
private String processData(String input) {
// to extract all alphabets from string
String withoutAccent = Normalizer.normalize(input, Normalizer.Form.NFD);
String output = withoutAccent.replaceAll("[^a-zA-Z ]", "");
return output;
//return s.replaceAll("[^A-Za-z]+", "");
}
项目:devops-cstack
文件:AlphaNumericsCharactersCheckUtils.java
public static String convertToAlphaNumerics(String value) {
logger.debug("Before : " + value);
value = Normalizer.normalize(value, Form.NFD);
value = value.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
value = value.replaceAll("[^-_a-zA-Z0-9\\s]", "").replace(" ", "");
logger.debug("After : " + value);
return value;
}
项目:devops-cstack
文件:AlphaNumericsCharactersCheckUtils.java
public static String deAccent(String value) {
logger.debug("Before : " + value);
String nfdNormalizedString = Normalizer.normalize(value, Form.NFD);
Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
value = pattern.matcher(nfdNormalizedString).replaceAll("");
logger.debug("After : " + value);
return value;
}
项目:devops-cstack
文件:Snapshot.java
public void setTag(String tag) {
if (tag != null) {
tag = tag.toLowerCase();
tag = Normalizer.normalize(tag, Normalizer.Form.NFD);
tag = tag.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
tag = tag.replaceAll("[^a-z0-9]", "");
}
this.tag = tag;
}
项目:devops-cstack
文件:Snapshot.java
public void setFullTag(String fullTag) {
if (fullTag != null) {
fullTag = fullTag.toLowerCase();
fullTag = Normalizer.normalize(fullTag, Normalizer.Form.NFD);
fullTag = fullTag.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
fullTag = fullTag.replaceAll("[^a-z0-9-]", "");
}
this.fullTag = fullTag;
}
项目:devops-cstack
文件:AlphaNumericsCharactersCheckUtils.java
public static String convertToAlphaNumerics(String value, Integer countApp)
throws UnsupportedEncodingException {
value = new String(value.getBytes("ISO-8859-1"), "UTF-8");
value = Normalizer.normalize(value, Form.NFD);
value = value.replaceAll("[^\\p{ASCII}]", "")
.replaceAll("[^a-zA-Z0-9\\s]", "").replace(" ", "");
if (value.equalsIgnoreCase("")) {
value = "default" + countApp;
}
return value;
}
项目:directory-ldap-api
文件:PrepareString.java
/**
* Normalize a String
*
* @param value the value to normalize
* @return The normalized value
*/
public static String normalize( String value )
{
if ( !Normalizer.isNormalized( value, Normalizer.Form.NFKC ) )
{
return Normalizer.normalize( value, Normalizer.Form.NFKC );
}
else
{
return value;
}
}
项目:alfresco-repository
文件:NameBasedUserNameGenerator.java
private String cleanseName(String name)
{
// Replace whitespace with _
String result= name.trim().toLowerCase().replaceAll("\\s+", "_");
// Remove accents from characters and strips out non-alphanumeric chars.
return Normalizer.normalize(result, Normalizer.Form.NFD).replaceAll("[^a-zA-z0-9_]+", "");
}
项目:chromium-net-for-android
文件:NetStringUtil.java
/**
* Attempts to convert text in a given character set to a Unicode string,
* and normalize it. Returns null on failure.
* @param text ByteBuffer containing the character array to convert.
* @param charsetName Character set it's in encoded in.
* @return: Unicode string on success, null on failure.
*/
@CalledByNative
private static String convertToUnicodeAndNormalize(
ByteBuffer text,
String charsetName) {
String unicodeString = convertToUnicode(text, charsetName);
if (unicodeString == null) return null;
return Normalizer.normalize(unicodeString, Normalizer.Form.NFC);
}
项目:idea-php-typo3-plugin
文件:Slugify.java
private String normalize(final String input) {
String text = Normalizer.normalize(input, Normalizer.Form.NFKD);
text = PATTERN_NORMALIZE_NON_ASCII.matcher(text).replaceAll(EMPTY);
text = PATTERN_NORMALIZE_SEPARATOR.matcher(text).replaceAll(underscoreSeparator ? "_" : "-");
text = PATTERN_NORMALIZE_TRIM_DASH.matcher(text).replaceAll(EMPTY);
return text;
}
项目:armadillo
文件:HkdfMessageDigest.java
@Override
public String derive(String providedMessage, String usageName) {
Objects.requireNonNull(providedMessage);
Objects.requireNonNull(usageName);
return Bytes.wrap(HKDF.fromHmacSha512().extractAndExpand(salt, Bytes.from(providedMessage, Normalizer.Form.NFKD).array(),
Bytes.from(usageName, Normalizer.Form.NFKD).array(), outLength)).encodeHex();
}
项目:armadillo
文件:DefaultEncryptionProtocol.java
private byte[] keyDerivationFunction(String contentKey, byte[] fingerprint, byte[] contentSalt, byte[] preferenceSalt, @Nullable char[] password) {
Bytes ikm = Bytes.wrap(fingerprint).append(contentSalt).append(Bytes.from(contentKey, Normalizer.Form.NFKD));
if (password != null) {
ikm.append(keyStretchingFunction.stretch(contentSalt, password, 32));
}
return HKDF.fromHmacSha512().extractAndExpand(preferenceSalt, ikm.array(), "DefaultEncryptionProtocol".getBytes(), keyLengthBit / 8);
}
项目:ARCLib
文件:Utils.java
public static String stripAccents(String s) {
if (s != null) {
s = Normalizer.normalize(s, Normalizer.Form.NFD);
s = s.replaceAll("[^\\p{ASCII}]", "");
return s;
} else {
return null;
}
}
项目:CommentView
文件:Validator.java
public int getTweetLength(String text) {
text = Normalizer.normalize(text, Normalizer.Form.NFC);
int length = text.codePointCount(0, text.length());
for (Extractor.Entity urlEntity : extractor.extractURLsWithIndices(text)) {
length += urlEntity.start - urlEntity.end;
length += urlEntity.value.toLowerCase().startsWith("https://") ? shortUrlLengthHttps : shortUrlLength;
}
return length;
}
项目:CommentView
文件:Validator.java
public int getTweetLength(String text) {
text = Normalizer.normalize(text, Normalizer.Form.NFC);
int length = text.codePointCount(0, text.length());
for (Extractor.Entity urlEntity : extractor.extractURLsWithIndices(text)) {
length += urlEntity.start - urlEntity.end;
length += urlEntity.value.toLowerCase().startsWith("https://") ? shortUrlLengthHttps : shortUrlLength;
}
return length;
}
项目:creacoinj
文件:BIP38PrivateKey.java
public ECKey decrypt(String passphrase) throws BadPassphraseException {
String normalizedPassphrase = Normalizer.normalize(passphrase, Normalizer.Form.NFC);
ECKey key = ecMultiply ? decryptEC(normalizedPassphrase) : decryptNoEC(normalizedPassphrase);
Sha256Hash hash = Sha256Hash.twiceOf(key.toAddress(params).toString().getBytes(Charsets.US_ASCII));
byte[] actualAddressHash = Arrays.copyOfRange(hash.getBytes(), 0, 4);
if (!Arrays.equals(actualAddressHash, addressHash))
throw new BadPassphraseException();
return key;
}
项目:cyberduck
文件:NFCNormalizer.java
public CharSequence normalize(final CharSequence name) {
if(!Normalizer.isNormalized(name, Normalizer.Form.NFC)) {
// Canonical decomposition followed by canonical composition (default)
final String normalized = Normalizer.normalize(name, Normalizer.Form.NFC);
if(log.isDebugEnabled()) {
log.debug(String.format("Normalized string %s to %s", name, normalized));
}
return normalized;
}
return name;
}
项目:bytes-java
文件:BytesConstructorTests.java
private void checkString(String string, Charset charset) {
Bytes b = Bytes.from(string, charset);
assertArrayEquals(string.getBytes(charset), b.array());
assertEquals(new String(string.getBytes(charset), charset), b.encodeCharset(charset));
if (charset != StandardCharsets.UTF_8) {
Bytes bUtf8 = Bytes.from(string);
assertArrayEquals(string.getBytes(StandardCharsets.UTF_8), bUtf8.array());
assertEquals(new String(string.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8), bUtf8.encodeUtf8());
} else {
Bytes bNormalized = Bytes.from(string, Normalizer.Form.NFKD);
assertArrayEquals(Normalizer.normalize(string, Normalizer.Form.NFKD).getBytes(charset), bNormalized.array());
}
}
项目:commons-sandbox
文件:JavaNormalization.java
public static void main(String[] args) {
String s = "São Paulo";
System.out.println(Normalizer.isNormalized(s, Normalizer.Form.NFKD));
System.out.println(s);
s = Normalizer.normalize(s, Normalizer.Form.NFKD);
System.out.println(Normalizer.isNormalized(s, Normalizer.Form.NFKD));
System.out.println(s);
// TODO: how can I print the difference?
}
项目:OpenJSharp
文件:RegularFileObject.java
@Override
public boolean isNameCompatible(String cn, JavaFileObject.Kind kind) {
cn.getClass();
// null check
if (kind == Kind.OTHER && getKind() != kind) {
return false;
}
String n = cn + kind.extension;
if (name.equals(n)) {
return true;
}
if (isMacOS && Normalizer.isNormalized(name, Normalizer.Form.NFD)
&& Normalizer.isNormalized(n, Normalizer.Form.NFC)) {
// On Mac OS X it is quite possible to file name and class
// name normalized in a different way - in that case we have to normalize file name
// to the Normal Form Compised (NFC)
String normName = Normalizer.normalize(name, Normalizer.Form.NFC);
if (normName.equals(n)) {
this.name = normName;
return true;
}
}
if (name.equalsIgnoreCase(n)) {
try {
// allow for Windows
return file.getCanonicalFile().getName().equals(n);
} catch (IOException e) {
}
}
return false;
}
项目:OpenJSharp
文件:Pattern.java
/**
* The pattern is converted to normalizedD form and then a pure group
* is constructed to match canonical equivalences of the characters.
*/
private void normalize() {
boolean inCharClass = false;
int lastCodePoint = -1;
// Convert pattern into normalizedD form
normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);
patternLength = normalizedPattern.length();
// Modify pattern to match canonical equivalences
StringBuilder newPattern = new StringBuilder(patternLength);
for(int i=0; i<patternLength; ) {
int c = normalizedPattern.codePointAt(i);
StringBuilder sequenceBuffer;
if ((Character.getType(c) == Character.NON_SPACING_MARK)
&& (lastCodePoint != -1)) {
sequenceBuffer = new StringBuilder();
sequenceBuffer.appendCodePoint(lastCodePoint);
sequenceBuffer.appendCodePoint(c);
while(Character.getType(c) == Character.NON_SPACING_MARK) {
i += Character.charCount(c);
if (i >= patternLength)
break;
c = normalizedPattern.codePointAt(i);
sequenceBuffer.appendCodePoint(c);
}
String ea = produceEquivalentAlternation(
sequenceBuffer.toString());
newPattern.setLength(newPattern.length()-Character.charCount(lastCodePoint));
newPattern.append("(?:").append(ea).append(")");
} else if (c == '[' && lastCodePoint != '\\') {
i = normalizeCharClass(newPattern, i);
} else {
newPattern.appendCodePoint(c);
}
lastCodePoint = c;
i += Character.charCount(c);
}
normalizedPattern = newPattern.toString();
}
项目:OpenJSharp
文件:Pattern.java
/**
* Attempts to compose input by combining the first character
* with the first combining mark following it. Returns a String
* that is the composition of the leading character with its first
* combining mark followed by the remaining combining marks. Returns
* null if the first two characters cannot be further composed.
*/
private String composeOneStep(String input) {
int len = countChars(input, 0, 2);
String firstTwoCharacters = input.substring(0, len);
String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC);
if (result.equals(firstTwoCharacters))
return null;
else {
String remainder = input.substring(len);
return result + remainder;
}
}
项目:OpenJSharp
文件:NormalizerBase.java
/**
* Normalizes a <code>String</code> using the given normalization form.
*
* @param str the input string to be normalized.
* @param form the normalization form
* @param options the optional features to be enabled.
*/
public static String normalize(String str, Normalizer.Form form, int options) {
int len = str.length();
boolean asciiOnly = true;
if (len < 80) {
for (int i = 0; i < len; i++) {
if (str.charAt(i) > 127) {
asciiOnly = false;
break;
}
}
} else {
char[] a = str.toCharArray();
for (int i = 0; i < len; i++) {
if (a[i] > 127) {
asciiOnly = false;
break;
}
}
}
switch (form) {
case NFC :
return asciiOnly ? str : NFC.normalize(str, options);
case NFD :
return asciiOnly ? str : NFD.normalize(str, options);
case NFKC :
return asciiOnly ? str : NFKC.normalize(str, options);
case NFKD :
return asciiOnly ? str : NFKD.normalize(str, options);
}
throw new IllegalArgumentException("Unexpected normalization form: " +
form);
}
项目:guereza
文件:SimpleIndexer.java
private Stream<String> getWords(final String sentence) {
return Arrays.stream(sentence.split(REGEX_SPACE))
.map(String::toLowerCase)
.map(s -> Normalizer.normalize(s, Normalizer.Form.NFD))
.map(s -> s.replaceAll(REGEX_ALPHANUM, ""))
.map(this::stemmed)
.filter(s -> !s.isEmpty())
.filter(w -> !StopWords.match(w));
}
项目:sunbird-utils
文件:Slug.java
public static String makeSlug(String input, boolean transliterate) {
String origInput = input;
// Validate the input
if (input == null) {
ProjectLogger.log("Provided input value is null");
return input;
}
// Remove extra spaces
input = input.trim();
// Remove URL encoding
input = urlDecode(input);
// If transliterate is required
if (transliterate) {
// Tranlisterate & cleanup
String transliterated = transliterate(input);
// transliterated = removeDuplicateChars(transliterated);
input = transliterated;
}
// Replace all whitespace with dashes
input = WHITESPACE.matcher(input).replaceAll("-");
// Remove all accent chars
input = Normalizer.normalize(input, Form.NFD);
// Remove all non-latin special characters
input = NONLATIN.matcher(input).replaceAll("");
// Remove any consecutive dashes
input = normalizeDashes(input);
// Validate before returning
validateResult(input, origInput);
// Slug is always lowercase
return input.toLowerCase(Locale.ENGLISH);
}
项目:Java_CTe
文件:XmlUtil.java
public static String removeAcentos(String str) {
str = str.replaceAll("\r", "");
str = str.replaceAll("\t", "");
str = str.replaceAll("\n", "");
str = str.replaceAll("&", "E");
str = str.replaceAll(">\\s+<", "><");
CharSequence cs = new StringBuilder(str == null ? "" : str);
return Normalizer.normalize(cs, Normalizer.Form.NFKD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
}
项目:jdk8u-jdk
文件:Pattern.java
/**
* The pattern is converted to normalizedD form and then a pure group
* is constructed to match canonical equivalences of the characters.
*/
private void normalize() {
boolean inCharClass = false;
int lastCodePoint = -1;
// Convert pattern into normalizedD form
normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);
patternLength = normalizedPattern.length();
// Modify pattern to match canonical equivalences
StringBuilder newPattern = new StringBuilder(patternLength);
for(int i=0; i<patternLength; ) {
int c = normalizedPattern.codePointAt(i);
StringBuilder sequenceBuffer;
if ((Character.getType(c) == Character.NON_SPACING_MARK)
&& (lastCodePoint != -1)) {
sequenceBuffer = new StringBuilder();
sequenceBuffer.appendCodePoint(lastCodePoint);
sequenceBuffer.appendCodePoint(c);
while(Character.getType(c) == Character.NON_SPACING_MARK) {
i += Character.charCount(c);
if (i >= patternLength)
break;
c = normalizedPattern.codePointAt(i);
sequenceBuffer.appendCodePoint(c);
}
String ea = produceEquivalentAlternation(
sequenceBuffer.toString());
newPattern.setLength(newPattern.length()-Character.charCount(lastCodePoint));
newPattern.append("(?:").append(ea).append(")");
} else if (c == '[' && lastCodePoint != '\\') {
i = normalizeCharClass(newPattern, i);
} else {
newPattern.appendCodePoint(c);
}
lastCodePoint = c;
i += Character.charCount(c);
}
normalizedPattern = newPattern.toString();
}
项目:jdk8u-jdk
文件:Pattern.java
/**
* Attempts to compose input by combining the first character
* with the first combining mark following it. Returns a String
* that is the composition of the leading character with its first
* combining mark followed by the remaining combining marks. Returns
* null if the first two characters cannot be further composed.
*/
private String composeOneStep(String input) {
int len = countChars(input, 0, 2);
String firstTwoCharacters = input.substring(0, len);
String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC);
if (result.equals(firstTwoCharacters))
return null;
else {
String remainder = input.substring(len);
return result + remainder;
}
}
项目:jdk8u-jdk
文件:NormalizerBase.java
/**
* Normalizes a <code>String</code> using the given normalization form.
*
* @param str the input string to be normalized.
* @param form the normalization form
* @param options the optional features to be enabled.
*/
public static String normalize(String str, Normalizer.Form form, int options) {
int len = str.length();
boolean asciiOnly = true;
if (len < 80) {
for (int i = 0; i < len; i++) {
if (str.charAt(i) > 127) {
asciiOnly = false;
break;
}
}
} else {
char[] a = str.toCharArray();
for (int i = 0; i < len; i++) {
if (a[i] > 127) {
asciiOnly = false;
break;
}
}
}
switch (form) {
case NFC :
return asciiOnly ? str : NFC.normalize(str, options);
case NFD :
return asciiOnly ? str : NFD.normalize(str, options);
case NFKC :
return asciiOnly ? str : NFKC.normalize(str, options);
case NFKD :
return asciiOnly ? str : NFKD.normalize(str, options);
}
throw new IllegalArgumentException("Unexpected normalization form: " +
form);
}
项目:BIP39
文件:WordListHashing.java
private static WordList normalizeNFKD(WordList wordList) {
return new WordList() {
@Override
public String getWord(int index) {
return Normalizer.normalize(wordList.getWord(index), Normalizer.Form.NFKD);
}
@Override
public char getSpace() {
return wordList.getSpace();
}
};
}