Java 类java.text.Normalizer.Form 实例源码
项目:solo-spring
文件:URICoder.java
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s
* The string the encode (assuming ASCII characters only)
* @param e
* A character that does not require encoding if found in the
* string.
*/
private static String encode_UTF8(String s, char e) {
// TODO: Normalizer requires Java 6!
String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isUnreserved(b) || b == e) {
sb.append((char) b);
} else {
appendEscape(sb, (byte) b);
}
}
return sb.toString();
}
项目:solo-spring
文件:URICoder.java
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s
* The string the encode (assuming ASCII characters only)
*/
private static String minimalEncode_UTF8(String s) {
// TODO: Normalizer requires Java 6!
String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isLegal(b)) {
sb.append((char) b);
} else {
appendEscape(sb, (byte) b);
}
}
return sb.toString();
}
项目:OpenJSharp
文件:CDataTransferer.java
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
long format, Transferable transferable) throws IOException {
if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
{
String charset = getDefaultTextCharset();
if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
try {
charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
} catch (UnsupportedFlavorException cannotHappen) {
}
}
return new URL(new String(bytes, charset));
}
if (format == CF_STRING) {
bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
}
return super.translateBytes(bytes, flavor, format, transferable);
}
项目:bibliome-java-utils
文件:Strings.java
/**
* Remove diacritics from the specified string.
* @param s
* @return a copy of the specified string with diacritics removed.
*/
public static final String removeDiacritics(String s) {
String n = Normalizer.normalize(s, Form.NFD);
StringBuilder sb = null;
for (int i = 0; i < n.length(); ++i) {
char c = n.charAt(i);
UnicodeBlock b = UnicodeBlock.of(c);
if (UnicodeBlock.COMBINING_DIACRITICAL_MARKS.equals(b) || UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT.equals(b)) {
if (sb == null) {
sb = new StringBuilder(n.length());
sb.append(n.substring(0, i));
}
continue;
}
if (sb != null)
sb.append(c);
}
if (sb == null)
return n;
return sb.toString();
}
项目:mycore
文件:MCRUtils.java
private static String getHash(int iterations, byte[] salt, String text, String algorithm)
throws NoSuchAlgorithmException {
MessageDigest digest;
if (--iterations < 0) {
iterations = 0;
}
byte[] data;
try {
digest = MessageDigest.getInstance(algorithm);
text = Normalizer.normalize(text, Form.NFC);
if (salt != null) {
digest.update(salt);
}
data = digest.digest(text.getBytes("UTF-8"));
for (int i = 0; i < iterations; i++) {
data = digest.digest(data);
}
} catch (UnsupportedEncodingException e) {
throw new MCRException("Could not get " + algorithm + " checksum", e);
}
return toHexString(data);
}
项目:testarea-pdfbox2
文件:TextSection.java
String toString(List<List<TextPosition>> words)
{
StringBuilder stringBuilder = new StringBuilder();
boolean first = true;
for (List<TextPosition> word : words)
{
if (first)
first = false;
else
stringBuilder.append(' ');
for (TextPosition textPosition : word)
{
stringBuilder.append(textPosition.getUnicode());
}
}
// cf. http://stackoverflow.com/a/7171932/1729265
return Normalizer.normalize(stringBuilder, Form.NFKC);
}
项目:MyVidCoRe
文件:Hash.java
private static String getHash(int iterations, byte[] salt, String str, String algorithm)
throws NoSuchAlgorithmException, UnsupportedEncodingException {
MessageDigest digest;
int it = iterations;
if (--it < 0) {
it = 0;
}
byte[] data;
digest = MessageDigest.getInstance(algorithm);
String text = Normalizer.normalize(str, Form.NFC);
if (salt != null) {
digest.update(salt);
}
data = digest.digest(text.getBytes("UTF-8"));
for (int i = 0; i < it; i++) {
data = digest.digest(data);
}
return Hash.toHexString(data);
}
项目:packagedrone
文件:Users.java
public static String hashIt ( final String salt, String data )
{
data = Normalizer.normalize ( data, Form.NFC );
final byte[] strData = data.getBytes ( StandardCharsets.UTF_8 );
final byte[] saltData = salt.getBytes ( StandardCharsets.UTF_8 );
final byte[] first = new byte[saltData.length + strData.length];
System.arraycopy ( saltData, 0, first, 0, saltData.length );
System.arraycopy ( strData, 0, first, saltData.length, strData.length );
final MessageDigest md = createDigest ();
byte[] digest = md.digest ( first );
final byte[] current = new byte[saltData.length + digest.length];
for ( int i = 0; i < 1000; i++ )
{
System.arraycopy ( saltData, 0, current, 0, saltData.length );
System.arraycopy ( digest, 0, current, saltData.length, digest.length );
digest = md.digest ( current );
}
return Base64.getEncoder ().encodeToString ( digest );
}
项目:isetools
文件:AccentCharNode.java
@Override
public Fragment expanded() {
char[] cs = super.innerText().toCharArray();
String accent = charMap.get(innerText().substring(0, 1));
if (accent == null) {
accent = "\uFFFD";
Message m = Message.builder("char.accent.unknown")
.fromNode(this)
.addNote("Character " + text + " cannot be expanded.")
.build();
Log.getInstance().add(m);
}
String str = "" + cs[1] + accent;
str = Normalizer.normalize(str, Form.NFC);
return wrap("ACCENT", str);
}
项目:berlioz
文件:URICoder.java
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s The string the encode (assuming ASCII characters only)
* @param e A character that does not require encoding if found in the string.
*/
private static String encodeUTF8(String s, char e) {
String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = StandardCharsets.UTF_8.encode(n);
// URI encode
StringBuilder sb = new StringBuilder();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isUnreserved(b) || b == e) {
sb.append((char) b);
} else {
appendEscape(sb, (byte) b);
}
}
return sb.toString();
}
项目:berlioz
文件:URICoder.java
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s The string the encode (assuming ASCII characters only)
*/
private static String minimalEncodeUTF8(String s) {
String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = StandardCharsets.UTF_8.encode(n);
// URI encode
StringBuilder sb = new StringBuilder();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isLegal(b)) {
sb.append((char) b);
} else {
appendEscape(sb, (byte) b);
}
}
return sb.toString();
}
项目:infobip-open-jdk-8
文件:CDataTransferer.java
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
long format, Transferable transferable) throws IOException {
if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
{
String charset = getDefaultTextCharset();
if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
try {
charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
} catch (UnsupportedFlavorException cannotHappen) {
}
}
return new URL(new String(bytes, charset));
}
if (format == CF_STRING) {
bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
}
return super.translateBytes(bytes, flavor, format, transferable);
}
项目:jdk8u-dev-jdk
文件:CDataTransferer.java
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
long format, Transferable transferable) throws IOException {
if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
{
String charset = getDefaultTextCharset();
if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
try {
charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
} catch (UnsupportedFlavorException cannotHappen) {
}
}
return new URL(new String(bytes, charset));
}
if (format == CF_STRING) {
bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
}
return super.translateBytes(bytes, flavor, format, transferable);
}
项目:furi
文件:URICoder.java
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s The string the encode (assuming ASCII characters only)
* @param e A character that does not require encoding if found in the string.
*/
private static String encode_UTF8(String s, char e) {
// TODO: Normalizer requires Java 6!
String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isUnreserved(b) || b == e) {
sb.append((char) b);
} else {
appendEscape(sb, (byte) b);
}
}
return sb.toString();
}
项目:furi
文件:URICoder.java
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s The string the encode (assuming ASCII characters only)
*/
private static String minimalEncode_UTF8(String s) {
// TODO: Normalizer requires Java 6!
String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isLegal(b)) {
sb.append((char) b);
} else {
appendEscape(sb, (byte) b);
}
}
return sb.toString();
}
项目:voj
文件:SlugifyUtils.java
/**
* 获取字符串的Slug.
* @param str - 待获取Slug的字符串
* @return 字符串对应的Slug
*/
public static String getSlug(String str) {
if ( str == null ) {
return "";
}
// Rid of White Spaces
String noWhiteSpace = WHITESPACE.matcher(str.trim()).replaceAll("-");
// Processing Non-ASCII Characters
try {
noWhiteSpace = URLEncoder.encode(noWhiteSpace, "UTF-8");
} catch (UnsupportedEncodingException e) {
// Never reach here
}
// Slugify String
String normalized = Normalizer.normalize(noWhiteSpace, Form.NFD);
return normalized.toLowerCase();
}
项目:package-drone
文件:Users.java
public static String hashIt ( final String salt, String data )
{
data = Normalizer.normalize ( data, Form.NFC );
final byte[] strData = data.getBytes ( StandardCharsets.UTF_8 );
final byte[] saltData = salt.getBytes ( StandardCharsets.UTF_8 );
final byte[] first = new byte[saltData.length + strData.length];
System.arraycopy ( saltData, 0, first, 0, saltData.length );
System.arraycopy ( strData, 0, first, saltData.length, strData.length );
final MessageDigest md = createDigest ();
byte[] digest = md.digest ( first );
final byte[] current = new byte[saltData.length + digest.length];
for ( int i = 0; i < 1000; i++ )
{
System.arraycopy ( saltData, 0, current, 0, saltData.length );
System.arraycopy ( digest, 0, current, saltData.length, digest.length );
digest = md.digest ( current );
}
return Base64.getEncoder ().encodeToString ( digest );
}
项目:srimporter
文件:SheetSerializer.java
static String NormalizeAccents(String regularString) {
if (!g_bNormalize)
return regularString; // leave the accents
String normalizedString = regularString.replace("é", "e");
normalizedString = Normalizer.normalize(normalizedString, Form.NFD);
StringBuilder sb = new StringBuilder(normalizedString);
for (int i = 0; i < sb.length(); i++) {
if (Character.getType(sb.charAt(i)) == Character.NON_SPACING_MARK) {
sb.delete(i, 1);
}
}
regularString = sb.toString();
return regularString;
}
项目:wikipedia_indexer
文件:AccentsDefault.java
public void apply(TokenStream stream) throws TokenizerException {
if (stream == null)
return;
stream.reset();
while (stream.hasNext()) {
String token = stream.next();
// String tmp = Normalizer.normalize(token, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+","");
String tmp = Normalizer.normalize(token, Form.NFD);
tmp = tmp.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
// .replaceAll("\\p{InCombiningDiacriticalMarks}+","");
if(!token.equals(tmp)) {
stream.previous();
stream.set(tmp);
stream.next();
}
}
}
项目:devops-cstack
文件:AlphaNumericsCharactersCheckUtils.java
public static String convertToAlphaNumerics(String value) {
logger.debug("Before : " + value);
value = Normalizer.normalize(value, Form.NFD);
value = value.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
value = value.replaceAll("[^-_a-zA-Z0-9\\s]", "").replace(" ", "");
logger.debug("After : " + value);
return value;
}
项目:devops-cstack
文件:AlphaNumericsCharactersCheckUtils.java
public static String deAccent(String value) {
logger.debug("Before : " + value);
String nfdNormalizedString = Normalizer.normalize(value, Form.NFD);
Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
value = pattern.matcher(nfdNormalizedString).replaceAll("");
logger.debug("After : " + value);
return value;
}
项目:devops-cstack
文件:AlphaNumericsCharactersCheckUtils.java
public static String convertToAlphaNumerics(String value, Integer countApp)
throws UnsupportedEncodingException {
value = new String(value.getBytes("ISO-8859-1"), "UTF-8");
value = Normalizer.normalize(value, Form.NFD);
value = value.replaceAll("[^\\p{ASCII}]", "")
.replaceAll("[^a-zA-Z0-9\\s]", "").replace(" ", "");
if (value.equalsIgnoreCase("")) {
value = "default" + countApp;
}
return value;
}
项目:sunbird-utils
文件:Slug.java
public static String makeSlug(String input, boolean transliterate) {
String origInput = input;
// Validate the input
if (input == null) {
ProjectLogger.log("Provided input value is null");
return input;
}
// Remove extra spaces
input = input.trim();
// Remove URL encoding
input = urlDecode(input);
// If transliterate is required
if (transliterate) {
// Tranlisterate & cleanup
String transliterated = transliterate(input);
// transliterated = removeDuplicateChars(transliterated);
input = transliterated;
}
// Replace all whitespace with dashes
input = WHITESPACE.matcher(input).replaceAll("-");
// Remove all accent chars
input = Normalizer.normalize(input, Form.NFD);
// Remove all non-latin special characters
input = NONLATIN.matcher(input).replaceAll("");
// Remove any consecutive dashes
input = normalizeDashes(input);
// Validate before returning
validateResult(input, origInput);
// Slug is always lowercase
return input.toLowerCase(Locale.ENGLISH);
}
项目:openjdk-jdk10
文件:Pattern.java
/**
* Attempts to compose input by combining the first character
* with the first combining mark following it. Returns a String
* that is the composition of the leading character with its first
* combining mark followed by the remaining combining marks. Returns
* null if the first two characters cannot be further composed.
*/
private static String composeOneStep(String input) {
int len = countChars(input, 0, 2);
String firstTwoCharacters = input.substring(0, len);
String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC);
if (result.equals(firstTwoCharacters))
return null;
else {
String remainder = input.substring(len);
return result + remainder;
}
}
项目:openjdk-jdk10
文件:Pattern.java
boolean match(Matcher matcher, int i, CharSequence seq) {
if (i < matcher.to) {
int ch0 = Character.codePointAt(seq, i);
int n = Character.charCount(ch0);
int j = i + n;
while (j < matcher.to) {
int ch1 = Character.codePointAt(seq, j);
if (Grapheme.isBoundary(ch0, ch1))
break;
ch0 = ch1;
j += Character.charCount(ch1);
}
if (i + n == j) { // single, assume nfc cp
if (predicate.is(ch0))
return next.match(matcher, j, seq);
} else {
while (i + n < j) {
String nfc = Normalizer.normalize(
seq.toString().substring(i, j), Normalizer.Form.NFC);
if (nfc.codePointCount(0, nfc.length()) == 1) {
if (predicate.is(nfc.codePointAt(0)) &&
next.match(matcher, j, seq)) {
return true;
}
}
ch0 = Character.codePointBefore(seq, j);
j -= Character.charCount(ch0);
}
}
if (j < matcher.to)
return false;
}
matcher.hitEnd = true;
return false;
}
项目:openjdk-jdk10
文件:CDataTransferer.java
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
long format, Transferable transferable) throws IOException {
if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass())) {
String charset = Charset.defaultCharset().name();
if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
try {
charset = new String((byte[]) transferable.getTransferData(javaTextEncodingFlavor), StandardCharsets.UTF_8);
} catch (UnsupportedFlavorException cannotHappen) {
}
}
String xml = new String(bytes, charset);
// macosx pasteboard returns a property list that consists of one URL
// let's extract it.
return new URL(extractURL(xml));
}
if(isUriListFlavor(flavor) && format == CF_FILE) {
// dragQueryFile works fine with files and url,
// it parses and extracts values from property list.
// maxosx always returns property list for
// CF_URL and CF_FILE
String[] strings = dragQueryFile(bytes);
if(strings == null) {
return null;
}
bytes = String.join(System.getProperty("line.separator"),
strings).getBytes();
// now we extracted uri from xml, now we should treat it as
// regular string that allows to translate data to target represantation
// class by base method
format = CF_STRING;
} else if (format == CF_STRING) {
bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
}
return super.translateBytes(bytes, flavor, format, transferable);
}
项目:smarti
文件:StringUtils.java
/**
* provides the slug name for the parsed input
* @param input
* @return
*/ //from https://stackoverflow.com/questions/1657193/java-code-library-for-generating-slugs-for-use-in-pretty-urls
public static String toSlug(String input) {
String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
return slug.toLowerCase(Locale.ROOT);
}
项目:openjdk9
文件:Pattern.java
/**
* Attempts to compose input by combining the first character
* with the first combining mark following it. Returns a String
* that is the composition of the leading character with its first
* combining mark followed by the remaining combining marks. Returns
* null if the first two characters cannot be further composed.
*/
private static String composeOneStep(String input) {
int len = countChars(input, 0, 2);
String firstTwoCharacters = input.substring(0, len);
String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC);
if (result.equals(firstTwoCharacters))
return null;
else {
String remainder = input.substring(len);
return result + remainder;
}
}
项目:openjdk9
文件:Pattern.java
boolean match(Matcher matcher, int i, CharSequence seq) {
if (i < matcher.to) {
int ch0 = Character.codePointAt(seq, i);
int n = Character.charCount(ch0);
int j = i + n;
while (j < matcher.to) {
int ch1 = Character.codePointAt(seq, j);
if (Grapheme.isBoundary(ch0, ch1))
break;
ch0 = ch1;
j += Character.charCount(ch1);
}
if (i + n == j) { // single, assume nfc cp
if (predicate.is(ch0))
return next.match(matcher, j, seq);
} else {
while (i + n < j) {
String nfc = Normalizer.normalize(
seq.toString().substring(i, j), Normalizer.Form.NFC);
if (nfc.codePointCount(0, nfc.length()) == 1) {
if (predicate.is(nfc.codePointAt(0)) &&
next.match(matcher, j, seq)) {
return true;
}
}
ch0 = Character.codePointBefore(seq, j);
j -= Character.charCount(ch0);
}
}
if (j < matcher.to)
return false;
}
matcher.hitEnd = true;
return false;
}
项目:mycore
文件:MCRTextNormalizer.java
public static String normalizeText(String text) {
text = text.toLowerCase(Locale.getDefault());
text = new MCRHyphenNormalizer().normalize(text).replace("-", " ");
text = Normalizer.normalize(text, Form.NFD).replaceAll("\\p{M}", ""); //canonical decomposition, remove accents
text = text.replace("ue", "u").replace("oe", "o").replace("ae", "a").replace("ß", "s").replace("ss", "s");
text = text.replaceAll("[^a-z0-9]\\s]", ""); //remove all non-alphabetic characters
// text = text.replaceAll("\\b.{1,3}\\b", " ").trim(); // remove all words with fewer than four characters
text = text.replaceAll("\\p{Punct}", " ").trim(); // remove all punctuation
text = text.replaceAll("\\s+", " "); // normalize whitespace
return text;
}
项目:mycore
文件:MCRNameMerger.java
private String normalize(String nameFragment) {
String text = nameFragment.toLowerCase(Locale.getDefault());
text = new MCRHyphenNormalizer().normalize(text).replace("-", " ");
text = Normalizer.normalize(text, Form.NFD).replaceAll("\\p{M}", ""); // canonical decomposition, then remove accents
text = text.replace("ue", "u").replace("oe", "o").replace("ae", "a").replace("ß", "s").replace("ss", "s");
text = text.replaceAll("[^a-z0-9]\\s]", ""); //remove all non-alphabetic characters
text = text.replaceAll("\\p{Punct}", " ").trim(); // remove all punctuation
text = text.replaceAll("\\s+", " "); // normalize whitespace
return text.trim();
}
项目:eSDK_EC_SDK_Java
文件:StringUtils.java
public static boolean isNumber(String str)
{
if (null == str)
{
return false;
}
str = Normalizer.normalize(str, Form.NFKC);
return str.matches("\\d+");
}
项目:eSDK_EC_SDK_Java
文件:StringUtils.java
public static boolean isNumber(String str)
{
if (null == str)
{
return false;
}
str = Normalizer.normalize(str, Form.NFKC);
return str.matches("\\d+");
}
项目:eSDK_EC_SDK_Java
文件:StringUtils.java
public static boolean isNumber(String str)
{
if (null == str)
{
return false;
}
str = Normalizer.normalize(str, Form.NFKC);
return str.matches("\\d+");
}
项目:zest-writer
文件:ZdsHttp.java
/**
* Transform any string on slug. Just alphanumeric, dash or underscore characters.
* @param input string to convert on slug
* @return slug string
*/
public static String toSlug(String input) {
String nowhitespace = Constant.WHITESPACE.matcher(input).replaceAll("-");
String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
String slug = Constant.NONLATIN.matcher(normalized).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}
项目:engerek
文件:DiacriticsFilter.java
@Override
public <T extends Object> PrismPropertyValue<T> apply(PrismPropertyValue<T> propertyValue) {
Validate.notNull(propertyValue, "Node must not be null.");
String text = getStringValue(propertyValue);
if (StringUtils.isEmpty(text)) {
return propertyValue;
}
String newValue = Normalizer.normalize(text, Form.NFD).replaceAll(
"\\p{InCombiningDiacriticalMarks}+", "");
propertyValue.setValue((T) newValue);
return propertyValue;
}
项目:site
文件:PostEntity.java
final String generateSlug(final String suggestedSlug, final String newTitle) {
String rv = suggestedSlug;
if (rv == null || rv.trim().isEmpty()) {
rv = Normalizer.normalize(newTitle.toLowerCase(), Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}|[^\\w\\s]", "").replaceAll("[\\s-]+", " ").trim().replaceAll("\\s", "-");
}
return rv;
}
项目:EventManager-JEE
文件:Event.java
public static String generateSlug(String input, Date createdAt) {
Pattern NONLATIN = Pattern.compile("[^\\w-]");
Pattern WHITESPACE = Pattern.compile("[\\s]");
SecureRandom random = new SecureRandom(createdAt.toString().getBytes());
String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
String lowerCase = slug.toLowerCase(Locale.ENGLISH);
String unique = lowerCase + "-" + new BigInteger(130, random).toString(32).substring(0, 6);
return unique;
}