private void findElements() { if (tq.matches("@")) { consumeAttribute(); } else if (tq.matches("*")) { allElements(); } else if (tq.matchesRegex("\\w+\\(.*\\).*")) { consumeOperatorFunction(); } else if (tq.matchesWord()) { byTag(); } else if (tq.matchesRegex("\\[\\d+\\]")) { byNth(); } else if (tq.matches("[")) { evals.add(consumePredicates(tq.chompBalanced('[', ']'))); } else { // unhandled throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, tq.remainder()); } }
private Evaluator byFunction(XTokenQueue predicatesQueue) { for (Map.Entry<String, FunctionEvaluator> entry : FUNCTION_MAPPING.entrySet()) { if (predicatesQueue.matchChomp(entry.getKey())) { String paramString = predicatesQueue.chompBalanced('(', ')'); List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(paramString)); if (params.get(0).startsWith("@")) { params.set(0, params.get(0).substring(1)); return entry.getValue().call(params.toArray(new String[0])); } else { return null; } } } throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, predicatesQueue.remainder()); }
private void functionRegex(String remainder) { Validate.isTrue(remainder.endsWith(")"), "Unclosed bracket for function! " + remainder); List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(remainder.substring("regex(".length(), remainder.length() - 1))); if (params.size() == 1) { elementOperator = new ElementOperator.Regex(params.get(0)); } else if (params.size() == 2) { if (params.get(0).startsWith("@")) { elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1)); } else { elementOperator = new ElementOperator.Regex(params.get(0), null, Integer.parseInt(params.get(1))); } } else if (params.size() == 3) { elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1), Integer.parseInt(params.get(2))); } else { throw new Selector.SelectorParseException("Unknown usage for regex()" + remainder); } }
private void byAttribute() { TokenQueue cq = new TokenQueue(tq.chompBalanced('[', ']')); // content queue String key = cq.consumeToAny(AttributeEvals); // eq, not, start, end, contain, match, (no val) Validate.notEmpty(key); cq.consumeWhitespace(); if (cq.isEmpty()) { if (key.startsWith("^")) evals.add(new Evaluator.AttributeStarting(key.substring(1))); else evals.add(new Evaluator.Attribute(key)); } else { if (cq.matchChomp("=")) evals.add(new Evaluator.AttributeWithValue(key, cq.remainder())); else if (cq.matchChomp("!=")) evals.add(new Evaluator.AttributeWithValueNot(key, cq.remainder())); else if (cq.matchChomp("^=")) evals.add(new Evaluator.AttributeWithValueStarting(key, cq.remainder())); else if (cq.matchChomp("$=")) evals.add(new Evaluator.AttributeWithValueEnding(key, cq.remainder())); else if (cq.matchChomp("*=")) evals.add(new Evaluator.AttributeWithValueContaining(key, cq.remainder())); else if (cq.matchChomp("~=")) evals.add(new Evaluator.AttributeWithValueMatching(key, Pattern.compile(cq.remainder()))); else throw new Selector.SelectorParseException( "Could not parse attribute query '%s': unexpected token at '%s'", query, cq.remainder()); } }
private void cssNthChild(boolean backwards, boolean ofType) { String argS = tq.chompTo(")").trim().toLowerCase(); Matcher mAB = NTH_AB.matcher(argS); Matcher mB = NTH_B.matcher(argS); final int a, b; if ("odd".equals(argS)) { a = 2; b = 1; } else if ("even".equals(argS)) { a = 2; b = 0; } else if (mAB.matches()) { a = mAB.group(3) != null ? Integer.parseInt(mAB.group(1).replaceFirst("^\\+", "")) : 1; b = mAB.group(4) != null ? Integer.parseInt(mAB.group(4).replaceFirst("^\\+", "")) : 0; } else if (mB.matches()) { a = 0; b = Integer.parseInt(mB.group().replaceFirst("^\\+", "")); } else { throw new Selector.SelectorParseException("Could not parse nth-index '%s': unexpected format", argS); } if (ofType) if (backwards) evals.add(new Evaluator.IsNthLastOfType(a, b)); else evals.add(new Evaluator.IsNthOfType(a, b)); else { if (backwards) evals.add(new Evaluator.IsNthLastChild(a, b)); else evals.add(new Evaluator.IsNthChild(a, b)); } }
private Evaluator consumePredicates(String queue) { XTokenQueue predicatesQueue = new XTokenQueue(queue); EvaluatorStack evaluatorStack = new EvaluatorStack(); Operation currentOperation = null; predicatesQueue.consumeWhitespace(); while (!predicatesQueue.isEmpty()) { if (predicatesQueue.matchChomp("and")) { currentOperation = Operation.AND; } else if (predicatesQueue.matchChomp("or")) { currentOperation = Operation.OR; } else { if (currentOperation == null && evaluatorStack.size() > 0) { throw new IllegalArgumentException(String.format("Need AND/OR between two predicate! %s", predicatesQueue.remainder())); } Evaluator evaluator; if (predicatesQueue.matches("(")) { evaluator = consumePredicates(predicatesQueue.chompBalanced('(', ')')); } else if (predicatesQueue.matches("@")) { evaluator = byAttribute(predicatesQueue); } else if (predicatesQueue.matchesRegex("\\w+.*")) { evaluator = byFunction(predicatesQueue); } else { throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, predicatesQueue.remainder()); } evaluatorStack.calc(evaluator, currentOperation); //consume operator currentOperation = null; } predicatesQueue.consumeWhitespace(); } evaluatorStack.mergeOr(); return evaluatorStack.peek(); }
private void findElements() { if (tq.matchChomp("#")) byId(); else if (tq.matchChomp(".")) byClass(); else if (tq.matchesWord()) byTag(); else if (tq.matches("[")) byAttribute(); else if (tq.matchChomp("*")) allElements(); else if (tq.matchChomp(":lt(")) indexLessThan(); else if (tq.matchChomp(":gt(")) indexGreaterThan(); else if (tq.matchChomp(":eq(")) indexEquals(); else if (tq.matches(":has(")) has(); else if (tq.matches(":contains(")) contains(false); else if (tq.matches(":containsOwn(")) contains(true); else if (tq.matches(":matches(")) matches(false); else if (tq.matches(":matchesOwn(")) matches(true); else if (tq.matches(":not(")) not(); else if (tq.matchChomp(":nth-child(")) cssNthChild(false, false); else if (tq.matchChomp(":nth-last-child(")) cssNthChild(true, false); else if (tq.matchChomp(":nth-of-type(")) cssNthChild(false, true); else if (tq.matchChomp(":nth-last-of-type(")) cssNthChild(true, true); else if (tq.matchChomp(":first-child")) evals.add(new Evaluator.IsFirstChild()); else if (tq.matchChomp(":last-child")) evals.add(new Evaluator.IsLastChild()); else if (tq.matchChomp(":first-of-type")) evals.add(new Evaluator.IsFirstOfType()); else if (tq.matchChomp(":last-of-type")) evals.add(new Evaluator.IsLastOfType()); else if (tq.matchChomp(":only-child")) evals.add(new Evaluator.IsOnlyChild()); else if (tq.matchChomp(":only-of-type")) evals.add(new Evaluator.IsOnlyOfType()); else if (tq.matchChomp(":empty")) evals.add(new Evaluator.IsEmpty()); else if (tq.matchChomp(":root")) evals.add(new Evaluator.IsRoot()); else // unhandled throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, tq.remainder()); }
private Evaluator byAttribute(XTokenQueue cq) { cq.matchChomp("@"); String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq, not, start, end, contain, match, (no val) Validate.notEmpty(key); cq.consumeWhitespace(); Evaluator evaluator; if (cq.isEmpty()) { if ("*".equals(key)) { evaluator = new XEvaluators.HasAnyAttribute(); } else { evaluator = new Evaluator.Attribute(key); } } else { if (cq.matchChomp("=")) { String value = chompEqualValue(cq); //to support select one class out of all if (key.equals("class")) { String className = XTokenQueue.trimQuotes(value); if (!className.contains(" ")) { evaluator = new Evaluator.Class(className); } else { evaluator = new Evaluator.AttributeWithValue(key, className); } } else { evaluator = new Evaluator.AttributeWithValue(key, XTokenQueue.trimQuotes(value)); } } else if (cq.matchChomp("!=")) evaluator = new Evaluator.AttributeWithValueNot(key, XTokenQueue.trimQuotes(chompEqualValue(cq))); else if (cq.matchChomp("^=")) evaluator = new Evaluator.AttributeWithValueStarting(key, XTokenQueue.trimQuotes(chompEqualValue(cq))); else if (cq.matchChomp("$=")) evaluator = new Evaluator.AttributeWithValueEnding(key, XTokenQueue.trimQuotes(chompEqualValue(cq))); else if (cq.matchChomp("*=")) evaluator = new Evaluator.AttributeWithValueContaining(key, XTokenQueue.trimQuotes(chompEqualValue(cq))); else if (cq.matchChomp("~=")) evaluator = new Evaluator.AttributeWithValueMatching(key, Pattern.compile(XTokenQueue.trimQuotes(chompEqualValue(cq)))); else throw new Selector.SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, chompEqualValue(cq)); } return evaluator; }
/** * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements * may include this element, or any of its children. * <p> * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because * multiple filters can be combined, e.g.: * </p> * <ul> * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes) * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely) * </ul> * <p> * See the query syntax documentation in {@link org.jsoup.select.Selector}. * </p> * * @param cssQuery a {@link Selector} CSS-like query * @return elements that match the query (empty if none match) * @see org.jsoup.select.Selector * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. */ public Elements select(String cssQuery) { return Selector.select(cssQuery, this); }
/** * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context. * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query * execution stops on the first hit.</p> * @param cssQuery cssQuery a {@link Selector} CSS-like query * @return the first matching element, or <b>{@code null}</b> if there is no match. */ public Element selectFirst(String cssQuery) { return Selector.selectFirst(cssQuery, this); }
/** * Find elements that match the {@link Selector} query, with this element as the starting context. Matched elements * may include this element, or any of its children. * <p/> * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because * multiple filters can be combined, e.g.: * <ul> * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes) * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely) * </ul> * <p/> * See the query syntax documentation in {@link org.jsoup.select.Selector}. * * @param query a {@link Selector} query * @return elements that match the query (empty if none match) * @see org.jsoup.select.Selector */ public Elements select(String query) { return Selector.select(query, this); }