public static Map<String, String> convertStringToActionProperties(String text) { PropertySplitter split = new PropertySplitter(text); String tok = split.nextPair(); Map<String,String> props = new LinkedHashMap<String,String>(); while (tok != null) { String[] prp = StringUtils.split(tok, "=", 2); //NOI18N if (prp.length >= 1 ) { String key = prp[0]; //in case the user adds -D by mistake, remove it to get a parsable xml file. if (key.startsWith("-D")) { //NOI18N key = key.substring("-D".length()); //NOI18N } if (key.startsWith("-")) { //NOI18N key = key.substring(1); } if (key.endsWith("=")) { key = key.substring(0, key.length() - 1); } if (key.trim().length() > 0 && Verifier.checkElementName(key.trim()) == null) { props.put(key.trim(), prp.length > 1 ? prp[1] : ""); } } tok = split.nextPair(); } return props; }
@Nullable public static String filterXMLCharacters(String value) { if (value != null) { StringBuilder builder = null; for (int i=0; i<value.length();i++) { char c = value.charAt(i); if (Verifier.isXMLCharacter(c)) { if (builder != null) { builder.append(c); } } else { if (builder == null) { builder = new StringBuilder(value.length()+5); builder.append(value, 0, i); } } } if (builder != null) { value = builder.toString(); } } return value; }
static private void addSanitizedContent( final Element e, final String val ) { try { e.addContent(val); } catch (final IllegalDataException ide) { LOGGER.warn( "Unable to add content", ide); // Unless a better idea can be found, we need to replace all // unparseable characters with a space as a placeholder final StringBuffer newVal = new StringBuffer(); for (int i = 0, len = val.length(); i < len; i++) { if (Verifier.isXMLCharacter(val.charAt(i))) { newVal.append(val.charAt(i)); } else { newVal.append(' '); } } e.addContent(newVal.toString()); } }
/** * Some characters are illegal in XML even as numerical character references. This method performs escaping of them * in a custom format, which is supposed to be unescaped on retrieving from XML using {@link #unescapeIllegalXmlChars(String)}. * Resulting text can be part of XML version 1.0 document. * * @see <a href="https://www.w3.org/International/questions/qa-controls">https://www.w3.org/International/questions/qa-controls</a> * @see Verifier#isXMLCharacter(int) */ @Nonnull public static String escapeIllegalXmlChars(@Nonnull String text) { StringBuilder b = null; int lastPos = 0; for (int i = 0; i < text.length(); i++) { int c = text.codePointAt(i); if (Character.isSupplementaryCodePoint(c)) { //noinspection AssignmentToForLoopParameter i++; } if (c == '#' || !Verifier.isXMLCharacter(c)) { if (b == null) b = new StringBuilder(text.length() + 5); // assuming there's one 'large' char (e.g. 0xFFFF) to escape numerically b.append(text, lastPos, i).append('#'); if (c != '#') b.append(Integer.toHexString(c)); b.append('#'); lastPos = i + 1; } } return b == null ? text : b.append(text, lastPos, text.length()).toString(); }
/** * XML1.0で使用が禁止されている文字を取り除く. * @param str 対象の文字列 * @return 結果文字列 */ public static String removeInvalidChars(String str) { StringBuilder result = new StringBuilder(); for (char c : str.toCharArray()) { if (Verifier.isXMLCharacter(c) || Verifier.isXMLWhitespace(c)) { result.append(c); } } return result.toString(); }
public boolean shouldEscape(char ch) { if (bits == 16) { if (Verifier.isHighSurrogate(ch)) return true; // Safer this way per http://unicode.org/faq/utf_bom.html#utf8-4 else return false; } if (bits == 8) { if ((int) ch > 255) return true; else return false; } if (bits == 7) { if ((int) ch > 127) return true; else return false; } else { if (Verifier.isHighSurrogate(ch)) return true; // Safer this way per http://unicode.org/faq/utf_bom.html#utf8-4 if (canEncode != null && encoder != null) { try { Boolean val = (Boolean) canEncode.invoke(encoder, new Object[]{new Character(ch)}); return !val.booleanValue(); } catch (Exception ignored) { } } // Return false if we don't know. This risks not escaping // things which should be escaped, but also means people won't // start getting loads of unnecessary escapes. return false; } }
public static boolean isIdentifier(String name) { //return isTokenOfType(manager, name, RncTokenTypes.IDENTIFIER_OR_KEYWORD); if (name == null) { return false; } return Verifier.checkXMLName(name) == null || name.length() >= 2 && name.charAt(0) == '\\' && Verifier.checkXMLName(name.substring(1)) == null; }
public static String getValidXMLString(String source){ StringBuilder ret = new StringBuilder(); for (int i = 0, len = source.length(); i < len; i++) { // skip non valid XML characters if (Verifier.isXMLCharacter(source.charAt(i))) { ret.append(source.charAt(i)); } } return ret.toString(); }
@Override public void split(@Nullable String text, @NotNull TextRange range, Consumer<TextRange> consumer) { if (text == null || StringUtil.isEmpty(text)) { return; } String substring = range.substring(text); if (Verifier.checkCharacterData(substring) != null) { return; } //for(int i = 0; i < text.length(); ++i) { // final char ch = text.charAt(i); // if (ch >= '\u3040' && ch <= '\u309f' || // Hiragana // ch >= '\u30A0' && ch <= '\u30ff' || // Katakana // ch >= '\u4E00' && ch <= '\u9FFF' || // CJK Unified ideographs // ch >= '\uF900' && ch <= '\uFAFF' || // CJK Compatibility Ideographs // ch >= '\uFF00' && ch <= '\uFFEF' //Halfwidth and Fullwidth Forms of Katakana & Fullwidth ASCII variants // ) { // return; // } //} final TextSplitter ws = TextSplitter.getInstance(); int from = range.getStartOffset(); int till; Matcher matcher = SPLIT_PATTERN.matcher(range.substring(text)); while (true) { checkCancelled(); List<TextRange> toCheck; TextRange wRange; String word; if(matcher.find()) { TextRange found = matcherRange(range, matcher); till = found.getStartOffset(); if (badSize(from, till)) { continue; } wRange = new TextRange(from, till); word = wRange.substring(text); from = found.getEndOffset(); } else { // end hit or zero matches wRange = new TextRange(from, range.getEndOffset()); word = wRange.substring(text); } if (word.contains("@")) { toCheck = excludeByPattern(text, wRange, MAIL, 0); } else if (word.contains("://")) { toCheck = excludeByPattern(text, wRange, URL, 0); } else { toCheck = Collections.singletonList(wRange); } for (TextRange r : toCheck) { ws.split(text, r, consumer); } if(matcher.hitEnd()) break; } }