@ScalarFunction("parse_agent") @Description("Returns Map, which has keys such as 'category', 'name', 'os', 'version', 'vendor' and 'os_version'") @SqlType("map<varchar,varchar>") public Block parseAgent(@TypeParameter("map<varchar,varchar>") Type mapType, @SqlType(StandardTypes.VARCHAR) Slice slice) { String argument = slice.toStringUtf8(); Map<String, String> stringMap = Classifier.parse(argument); if (pageBuilder.isFull()) { pageBuilder.reset(); } BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(0); BlockBuilder singleMapBlockBuilder = blockBuilder.beginBlockEntry(); for (Map.Entry<String, String> entry : stringMap.entrySet()) { VARCHAR.writeSlice(singleMapBlockBuilder, Slices.utf8Slice(entry.getKey())); VARCHAR.writeSlice(singleMapBlockBuilder, Slices.utf8Slice(entry.getValue())); } blockBuilder.closeEntry(); pageBuilder.declarePosition(); return (Block) mapType.getObject(blockBuilder, blockBuilder.getPositionCount() - 1); }
@SqlType(StandardTypes.VARCHAR) @TypeParameterContainer({@TypeParameter("decimal(lat_precision, lat_scale)") , @TypeParameter("decimal(lng_precision, lng_scale)")} ) @Nullable public static Slice geohash_encode_dec( @TypeParameter("decimal(lat_precision, lat_scale)") DecimalType latParameter, @TypeParameter("decimal(lng_precision, lng_scale)") DecimalType lngParameter, @SqlType("decimal(lat_precision, lat_scale)") Slice lat, @SqlType("decimal(lng_precision, lng_scale)") Slice lng, @SqlType(StandardTypes.INTEGER) long precision) { BigDecimal biglat = new BigDecimal(Decimals.decodeUnscaledValue(lat), latParameter.getScale()); BigDecimal bigLng = new BigDecimal(Decimals.decodeUnscaledValue(lng), lngParameter.getScale()); return GeohashEncode.geohash_encode(biglat.doubleValue(), bigLng.doubleValue(), precision); }
@Test public void testMinDoubleDouble() throws Exception { InternalAggregationFunction function = METADATA.getFunctionRegistry().getAggregateFunctionImplementation(new Signature("min_by", AGGREGATE, StandardTypes.DOUBLE, StandardTypes.DOUBLE, StandardTypes.DOUBLE)); assertAggregation( function, 1.0, null, createDoublesBlock(null, null), createDoublesBlock(null, null)); assertAggregation( function, 1.0, 3.0, createDoublesBlock(3.0, 2.0, 5.0, 3.0), createDoublesBlock(1.0, 1.5, 2.0, 4.0)); }
@InputFunction public static void input(EvaluateClassifierPredictionsState state, @SqlType(StandardTypes.VARCHAR) Slice truth, @SqlType(StandardTypes.VARCHAR) Slice prediction) { if (truth.equals(prediction)) { String key = truth.toStringUtf8(); if (!state.getTruePositives().containsKey(key)) { state.addMemoryUsage(truth.length() + SIZE_OF_INT); } state.getTruePositives().put(key, state.getTruePositives().getOrDefault(key, 0) + 1); } else { String truthKey = truth.toStringUtf8(); String predictionKey = prediction.toStringUtf8(); if (!state.getFalsePositives().containsKey(predictionKey)) { state.addMemoryUsage(prediction.length() + SIZE_OF_INT); } state.getFalsePositives().put(predictionKey, state.getFalsePositives().getOrDefault(predictionKey, 0) + 1); if (!state.getFalseNegatives().containsKey(truthKey)) { state.addMemoryUsage(truth.length() + SIZE_OF_INT); } state.getFalseNegatives().put(truthKey, state.getFalseNegatives().getOrDefault(truthKey, 0) + 1); } }
@Test public void testMaxDoubleVarchar() { InternalAggregationFunction function = METADATA.getFunctionRegistry().getAggregateFunctionImplementation(new Signature("max_by", AGGREGATE, StandardTypes.VARCHAR, StandardTypes.VARCHAR, StandardTypes.DOUBLE)); assertAggregation( function, 1.0, "a", createStringsBlock("z", "a", null), createDoublesBlock(1.0, 2.0, null)); assertAggregation( function, 1.0, "hi", createStringsBlock("zz", "hi", null, "a"), createDoublesBlock(0.0, 1.0, null, -1.0)); }
@Nullable @Description("returns regex group of extracted string with a pattern") @ScalarFunction @SqlType(StandardTypes.VARCHAR) public static Slice regexpExtract(@SqlType(StandardTypes.VARCHAR) Slice source, @SqlType(RegexpType.NAME) Regex pattern, @SqlType(StandardTypes.BIGINT) long groupIndex) { Matcher matcher = pattern.matcher(source.getBytes()); validateGroup(groupIndex, matcher.getEagerRegion()); int group = Ints.checkedCast(groupIndex); int offset = matcher.search(0, source.length(), Option.DEFAULT); if (offset == -1) { return null; } Region region = matcher.getEagerRegion(); int beg = region.beg[group]; int end = region.end[group]; if (beg == -1) { // end == -1 must be true return null; } Slice slice = source.slice(beg, end - beg); return slice; }
@Test public void testDuplicateKeysValues() throws Exception { MapType mapType = new MapType(DOUBLE, VARCHAR); InternalAggregationFunction aggFunc = metadata.getFunctionRegistry().getAggregateFunctionImplementation(new Signature(NAME, AGGREGATE, mapType.getTypeSignature().toString(), StandardTypes.DOUBLE, StandardTypes.VARCHAR)); assertAggregation( aggFunc, 1.0, ImmutableMap.of(1.0, "a"), createDoublesBlock(1.0, 1.0, 1.0), createStringsBlock("a", "b", "c")); mapType = new MapType(DOUBLE, BIGINT); aggFunc = metadata.getFunctionRegistry().getAggregateFunctionImplementation(new Signature(NAME, AGGREGATE, mapType.getTypeSignature().toString(), StandardTypes.DOUBLE, StandardTypes.BIGINT)); assertAggregation( aggFunc, 1.0, ImmutableMap.of(1.0, 99L, 2.0, 99L, 3.0, 99L), createDoublesBlock(1.0, 2.0, 3.0), createLongsBlock(99L, 99L, 99L)); }
@SqlType(StandardTypes.BOOLEAN) @Nullable @SqlNullable public static Boolean bloomFilterPersist(@SqlNullable @SqlType(BloomFilterType.TYPE) Slice bloomFilterSlice, @SqlType(StandardTypes.VARCHAR) Slice urlSlice) throws Exception { // Nothing todo if (urlSlice == null) { return true; } BloomFilter bf = getOrLoadBloomFilter(bloomFilterSlice); // Persist // we do not try catch here to make sure that errors are communicated clearly to the client // and typical retry logic continues to work String url = new String(urlSlice.getBytes()); if (!HTTP_CLIENT.isStarted()) { log.warn("Http client was not started, trying to start"); HTTP_CLIENT.start(); } Request post = HTTP_CLIENT.POST(url); post.content(new StringContentProvider(new String(bf.toBase64()))); post.method("PUT"); post.send(); log.info("Persisted " + bf.toString() + " " + url); return true; }
@ScalarFunction("is_pc") @Description("Returns Boolean: map['category'] is a pc or not.") @SqlType(StandardTypes.BOOLEAN) public static boolean isPC(@SqlNullable @SqlType(StandardTypes.VARCHAR) Slice slice) { String argument = slice.toStringUtf8(); return Classifier.parse(argument).get(DataSet.ATTRIBUTE_CATEGORY).equals(DataSet.DATASET_CATEGORY_PC); }
@ScalarFunction("is_smartphone") @Description("Returns Boolean: map['category'] is a smartphone or not.") @SqlType(StandardTypes.BOOLEAN) public static boolean IsSmartPhone(@SqlNullable @SqlType(StandardTypes.VARCHAR) Slice slice) { String argument = slice.toStringUtf8(); return Classifier.parse(argument).get(DataSet.ATTRIBUTE_CATEGORY).equals(DataSet.DATASET_CATEGORY_SMARTPHONE); }
@ScalarFunction("is_mobilephone") @Description("Returns Boolean: map['category'] is a mobilephone or not.") @SqlType(StandardTypes.BOOLEAN) public static boolean IsMobilePhone(@SqlNullable @SqlType(StandardTypes.VARCHAR) Slice slice) { String argument = slice.toStringUtf8(); return Classifier.parse(argument).get(DataSet.ATTRIBUTE_CATEGORY).equals(DataSet.DATASET_CATEGORY_MOBILEPHONE); }
@ScalarFunction("is_appliance") @Description("Returns Boolean: map['category'] is a appliance or not.") @SqlType(StandardTypes.BOOLEAN) public static boolean IsAppliance(@SqlNullable @SqlType(StandardTypes.VARCHAR) Slice slice) { String argument = slice.toStringUtf8(); return Classifier.parse(argument).get(DataSet.ATTRIBUTE_CATEGORY).equals(DataSet.DATASET_CATEGORY_APPLIANCE); }
@ScalarFunction("is_crawler") @Description("Returns Boolean: map['category'] is a crawler or not.") @SqlType(StandardTypes.BOOLEAN) public static boolean IsCrawler(@SqlNullable @SqlType(StandardTypes.VARCHAR) Slice slice) { String argument = slice.toStringUtf8(); return Classifier.parse(argument).get(DataSet.ATTRIBUTE_CATEGORY).equals(DataSet.DATASET_CATEGORY_CRAWLER); }
@ScalarFunction("is_misc") @Description("Returns Boolean: map['category'] is a misc or not.") @SqlType(StandardTypes.BOOLEAN) public static boolean IsMisc(@SqlNullable @SqlType(StandardTypes.VARCHAR) Slice slice) { String argument = slice.toStringUtf8(); return Classifier.parse(argument).get(DataSet.ATTRIBUTE_CATEGORY).equals(DataSet.DATASET_CATEGORY_MISC); }
@ScalarFunction("is_unknown") @Description("Returns Boolean: map['category'] is a unknown or not.") @SqlType(StandardTypes.BOOLEAN) public static boolean IsUnknown(@SqlNullable @SqlType(StandardTypes.VARCHAR) Slice slice) { String argument = slice.toStringUtf8(); return Classifier.parse(argument).get(DataSet.ATTRIBUTE_CATEGORY).equals(DataSet.VALUE_UNKNOWN); }
@ScalarFunction("fromWei") @Description("fromWei") @SqlType(StandardTypes.DOUBLE) public static double fromWei(@SqlType(StandardTypes.DOUBLE) double num, @SqlType(StandardTypes.VARCHAR) Slice unit) { String unitStr = unit.toStringUtf8().toUpperCase(); EthereumUnit u = EthereumUnit.valueOf(unitStr); return u.fromWei(num); }
@ScalarFunction("toWei") @Description("toWei") @SqlType(StandardTypes.DOUBLE) public static double toWei(@SqlType(StandardTypes.DOUBLE) double num, @SqlType(StandardTypes.VARCHAR) Slice unit) { String unitStr = unit.toStringUtf8().toUpperCase(); EthereumUnit u = EthereumUnit.valueOf(unitStr); return u.toWei(num); }
@Description("Returns the approximate cardinality of a HLL") @ScalarFunction("cardinality") @SqlType(StandardTypes.BIGINT) public static long hllCardinality(@SqlType(HyperLogLogType.TYPE) Slice hll) { return (Long) HyperLogLog.fromBytes(hll.getBytes()).approximateSize().estimate(); }
@Description("Create a HLL from a string") @ScalarFunction("hll_create") @SqlType(HyperLogLogType.TYPE) public static Slice hllCreate(@SqlType(StandardTypes.VARCHAR) Slice string, @SqlType(StandardTypes.BIGINT) long bits) { HyperLogLogMonoid monoid = new HyperLogLogMonoid((int) bits); DenseHLL hll = monoid.create(string.getBytes()).toDenseHLL(); return Slices.wrappedBuffer(HyperLogLog.toBytes(hll)); }
@SqlType(StandardTypes.VARCHAR) public static Slice shuffle_string(@SqlType(StandardTypes.VARCHAR) Slice string) { String id = string.toStringUtf8(); Random rnd = new Random(id.charAt(0)); byte[] bytes = id.getBytes(); for (int i = bytes.length; i > 1; i--) { swap(bytes, i - 1, rnd.nextInt(i)); } return Slices.wrappedBuffer(bytes); }
@TypeParameter(StandardTypes.DOUBLE) @SqlType(StandardTypes.BOOLEAN) @Nullable public static Boolean contains( @TypeParameter(StandardTypes.DOUBLE) Type elementType, @SqlType("array(double)") Block arrayBlock, @SqlType(StandardTypes.DOUBLE) double lng, @SqlType(StandardTypes.DOUBLE) double lat) { double[] array= new double[arrayBlock.getPositionCount()] ; Polygon poly = new Polygon(); for (int i = 0; i < arrayBlock.getPositionCount(); i++) { if (arrayBlock.isNull(i)) { continue; } array[i]=elementType.getDouble(arrayBlock, i); } poly.startPath(array[0], array[1]); for (int i = 2; i < array.length; i += 2) { poly.lineTo(array[i], array[i + 1]); } return OperatorContains.local().execute(poly, new Point(lng,lat), null, null); }
@SqlType(StandardTypes.DOUBLE) public static double haversine(@SqlType(StandardTypes.DOUBLE) double lat1, @SqlType(StandardTypes.DOUBLE) double lng1, @SqlType(StandardTypes.DOUBLE) double lat2, @SqlType(StandardTypes.DOUBLE) double lng2) { double dLat = Math.toRadians(lat2 - lat1); double dLng = Math.toRadians(lng2 - lng1); double a = Math.sin(dLat / 2.0D) * Math.sin(dLat / 2.0D) + Math.cos(Math.toRadians(lat1)) * Math.cos(Math.toRadians(lat2)) * Math.sin(dLng / 2.0D) * Math.sin(dLng / 2.0D); double c = 2.0D * Math.atan2(Math.sqrt(a), Math.sqrt(1.0D - a)); return 6371000.0D * c; }
@SqlType("array(double)") public static Block geohash_decode(@SqlType(StandardTypes.VARCHAR) Slice geohash) { BlockBuilder blockBuilder = DOUBLE.createBlockBuilder(new BlockBuilderStatus(), 2); LatLong coordinates = GeoHash.decodeHash(geohash.toStringUtf8()); DOUBLE.writeDouble(blockBuilder, coordinates.getLat()); DOUBLE.writeDouble(blockBuilder, coordinates.getLon()); return blockBuilder.build(); }
@SqlType(StandardTypes.VARCHAR) public static Slice to_cuebiq_week_format(@SqlType(StandardTypes.DATE) Slice date, @SqlType(StandardTypes.VARCHAR) Slice dateFormat) throws ParseException { SimpleDateFormat formatter = new SimpleDateFormat(dateFormat.toStringUtf8()); Calendar calendar = Calendar.getInstance(); calendar.setMinimalDaysInFirstWeek(4); calendar.setFirstDayOfWeek(Calendar.MONDAY); calendar.setTime(formatter.parse(date.toStringUtf8())); int week = calendar.get(Calendar.WEEK_OF_YEAR); int year = calendar.getWeekYear(); return Slices.utf8Slice(year + "-W" + (week < 10 ? "0" + week : week) + "-1"); }
@Description("minute of the hour of the given timestamp") @ScalarFunction("minute") @SqlType(StandardTypes.BIGINT) public static long minuteFromTimestampWithTimeZone(@SqlType(StandardTypes.TIMESTAMP_WITH_TIME_ZONE) long timestampWithTimeZone) { return unpackChronology(timestampWithTimeZone).minuteOfHour().get(unpackMillisUtc(timestampWithTimeZone)); }
@Test public void testValidBoolean() throws Exception { InternalAggregationFunction booleanAgg = metadata.getFunctionRegistry().getAggregateFunctionImplementation(new Signature("arbitrary", AGGREGATE, StandardTypes.BOOLEAN, StandardTypes.BOOLEAN)); assertAggregation( booleanAgg, 1.0, true, createBooleansBlock(true, true)); }
@ScalarOperator(CAST) @SqlType(StandardTypes.DOUBLE) public static double castToDouble(@SqlType(StandardTypes.VARCHAR) Slice slice) { try { return Double.parseDouble(slice.toStringUtf8()); } catch (Exception e) { throw new PrestoException(INVALID_CAST_ARGUMENT, format("Can not cast '%s' to DOUBLE", slice.toStringUtf8())); } }
@Nullable @Description("extract part from url") @ScalarFunction @SqlType(StandardTypes.VARCHAR) public static Slice urlExtractPath(@SqlType(StandardTypes.VARCHAR) Slice url) { URI uri = parseUrl(url); return (uri == null) ? null : slice(uri.getPath()); }
@ScalarOperator(HASH_CODE) @SqlType(StandardTypes.BIGINT) public static long hashCode(@SqlType(StandardTypes.TIMESTAMP_WITH_TIME_ZONE) long value) { long millis = unpackMillisUtc(value); return (int) (millis ^ (millis >>> 32)); }
private static boolean columnTypeSupported(List<HiveColumnHandle> columns) { List<String> fields = columns.stream() .map(HiveColumnHandle::getTypeSignature) .map(TypeSignature::getBase) .filter(base -> StandardTypes.ARRAY.equals(base) || StandardTypes.MAP.equals(base) || StandardTypes.ROW.equals(base)) .collect(toList()); return fields.isEmpty(); }
@Description("second of the minute of the given time") @ScalarFunction("second") @SqlType(StandardTypes.BIGINT) public static long secondFromTime(@SqlType(StandardTypes.TIME) long time) { // Time is effectively UTC so no need for a custom chronology return SECOND_OF_MINUTE.get(time); }
@Description("quarter of the year of the given timestamp") @ScalarFunction("quarter") @SqlType(StandardTypes.BIGINT) public static long quarterFromTimestampWithTimeZone(@SqlType(StandardTypes.TIMESTAMP_WITH_TIME_ZONE) long timestampWithTimeZone) { return QUARTER_OF_YEAR.getField(unpackChronology(timestampWithTimeZone)).get(unpackMillisUtc(timestampWithTimeZone)); }
@ScalarOperator(CAST) @SqlType(StandardTypes.DATE) public static long castFromSlice(@SqlType(StandardTypes.VARCHAR) Slice value) { try { return parseDate(trim(value).toStringUtf8()); } catch (IllegalArgumentException e) { throw new PrestoException(INVALID_CAST_ARGUMENT, e); } }
@Test public void testMinNull() { InternalAggregationFunction function = METADATA.getFunctionRegistry().getAggregateFunctionImplementation(new Signature("min_by", AGGREGATE, StandardTypes.DOUBLE, StandardTypes.DOUBLE, StandardTypes.DOUBLE)); assertAggregation( function, 1.0, 1.0, createDoublesBlock(1.0, null), createDoublesBlock(1.0, 2.0)); }
@Test public void testMaxDoubleVarchar() { InternalAggregationFunction function = METADATA.getFunctionRegistry().getAggregateFunctionImplementation(new Signature("max_by", AGGREGATE, "array<varchar>", StandardTypes.VARCHAR, StandardTypes.DOUBLE, StandardTypes.BIGINT)); assertAggregation( function, 1.0, ImmutableList.of("a", "z"), createStringsBlock("z", "a", null), createDoublesBlock(1.0, 2.0, null), createRLEBlock(2L, 3)); assertAggregation( function, 1.0, ImmutableList.of("bb", "hi"), createStringsBlock("zz", "hi", "bb", "a"), createDoublesBlock(0.0, 1.0, 2.0, -1.0), createRLEBlock(2L, 4)); assertAggregation( function, 1.0, ImmutableList.of("hi", "zz"), createStringsBlock("zz", "hi", null, "a"), createDoublesBlock(0.0, 1.0, null, -1.0), createRLEBlock(2L, 4)); }
@Test public void testValidString() throws Exception { InternalAggregationFunction stringAgg = metadata.getFunctionRegistry().getAggregateFunctionImplementation(new Signature("arbitrary", AGGREGATE, StandardTypes.VARCHAR, StandardTypes.VARCHAR)); assertAggregation( stringAgg, 1.0, "a", createStringsBlock("a", "a")); }
@Description("converts an angle in degrees to radians") @ScalarFunction @SqlType(StandardTypes.DOUBLE) public static double radians(@SqlType(StandardTypes.DOUBLE) double degrees) { return Math.toRadians(degrees); }
@Description("Infinity") @ScalarFunction @SqlType(StandardTypes.DOUBLE) public static double infinity() { return Double.POSITIVE_INFINITY; }
@Nullable @ScalarFunction("json_array_get") @SqlType(StandardTypes.JSON) public static Slice varcharJsonArrayGet(@SqlType(StandardTypes.VARCHAR) Slice json, @SqlType(StandardTypes.BIGINT) long index) { return jsonArrayGet(json, index); }
@ScalarOperator(OperatorType.CAST) @SqlType(RegexpType.NAME) public static Regex castToRegexp(@SqlType(StandardTypes.VARCHAR) Slice pattern) { Regex regex; try { // When normal UTF8 encoding instead of non-strict UTF8) is used, joni can infinite loop when invalid UTF8 slice is supplied to it. regex = new Regex(pattern.getBytes(), 0, pattern.length(), Option.DEFAULT, NonStrictUTF8Encoding.INSTANCE, Syntax.Java); } catch (Exception e) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, e); } return regex; }