trinodb / trino

Official repository of Trino, the distributed SQL query engine for big data, formerly known as PrestoSQL (https://trino.io)
https://trino.io
Apache License 2.0
10.15k stars 2.93k forks source link

Support Hive view containing custom UDFs #5689

Open qianmoQ opened 3 years ago

qianmoQ commented 3 years ago

i'm Integrate Coral with Presto to enable querying hive views has error:

io.prestosql.spi.PrestoException: Failed to translate Hive view 'odv.app': Unknown function name: smid_udf
    at io.prestosql.plugin.hive.ViewReaderUtil$HiveViewReader.decodeViewData(ViewReaderUtil.java:157)
    at io.prestosql.plugin.hive.HiveMetadata.lambda$getView$60(HiveMetadata.java:1860)
    at java.base/java.util.Optional.map(Optional.java:265)
    at io.prestosql.plugin.hive.HiveMetadata.getView(HiveMetadata.java:1858)
    at io.prestosql.plugin.base.classloader.ClassLoaderSafeConnectorMetadata.getView(ClassLoaderSafeConnectorMetadata.java:505)
    at io.prestosql.metadata.MetadataManager.getView(MetadataManager.java:1069)
    at io.prestosql.sql.analyzer.StatementAnalyzer$Visitor.visitTable(StatementAnalyzer.java:1202)
    at io.prestosql.sql.analyzer.StatementAnalyzer$Visitor.visitTable(StatementAnalyzer.java:322)
    at io.prestosql.sql.tree.Table.accept(Table.java:53)
    at io.prestosql.sql.tree.AstVisitor.process(AstVisitor.java:27)
    at io.prestosql.sql.analyzer.StatementAnalyzer$Visitor.process(StatementAnalyzer.java:339)
    at io.prestosql.sql.analyzer.StatementAnalyzer$Visitor.analyzeFrom(StatementAnalyzer.java:2483)
    at io.prestosql.sql.analyzer.StatementAnalyzer$Visitor.visitQuerySpecification(StatementAnalyzer.java:1526)
    at io.prestosql.sql.analyzer.StatementAnalyzer$Visitor.visitQuerySpecification(StatementAnalyzer.java:322)
    at io.prestosql.sql.tree.QuerySpecification.accept(QuerySpecification.java:144)
    at io.prestosql.sql.tree.AstVisitor.process(AstVisitor.java:27)
    at io.prestosql.sql.analyzer.StatementAnalyzer$Visitor.process(StatementAnalyzer.java:339)
    at io.prestosql.sql.analyzer.StatementAnalyzer$Visitor.process(StatementAnalyzer.java:349)
    at io.prestosql.sql.analyzer.StatementAnalyzer$Visitor.visitQuery(StatementAnalyzer.java:1039)
    at io.prestosql.sql.analyzer.StatementAnalyzer$Visitor.visitQuery(StatementAnalyzer.java:322)
    at io.prestosql.sql.tree.Query.accept(Query.java:107)
    at io.prestosql.sql.tree.AstVisitor.process(AstVisitor.java:27)
    at io.prestosql.sql.analyzer.StatementAnalyzer$Visitor.process(StatementAnalyzer.java:339)
    at io.prestosql.sql.analyzer.StatementAnalyzer.analyze(StatementAnalyzer.java:308)
    at io.prestosql.sql.analyzer.Analyzer.analyze(Analyzer.java:83)
    at io.prestosql.sql.analyzer.Analyzer.analyze(Analyzer.java:75)
    at io.prestosql.sql.analyzer.QueryExplainer.analyze(QueryExplainer.java:115)
    at io.prestosql.sql.analyzer.QueryExplainer.getLogicalPlan(QueryExplainer.java:182)
    at io.prestosql.sql.analyzer.QueryExplainer.getDistributedPlan(QueryExplainer.java:202)
    at io.prestosql.sql.analyzer.QueryExplainer.getPlan(QueryExplainer.java:130)
    at io.prestosql.sql.rewrite.ExplainRewrite$Visitor.getQueryPlan(ExplainRewrite.java:137)
    at io.prestosql.sql.rewrite.ExplainRewrite$Visitor.visitExplain(ExplainRewrite.java:115)
    at io.prestosql.sql.rewrite.ExplainRewrite$Visitor.visitExplain(ExplainRewrite.java:65)
    at io.prestosql.sql.tree.Explain.accept(Explain.java:80)
    at io.prestosql.sql.tree.AstVisitor.process(AstVisitor.java:27)
    at io.prestosql.sql.rewrite.ExplainRewrite.rewrite(ExplainRewrite.java:62)
    at io.prestosql.sql.rewrite.StatementRewrite.rewrite(StatementRewrite.java:57)
    at io.prestosql.sql.analyzer.Analyzer.analyze(Analyzer.java:80)
    at io.prestosql.sql.analyzer.Analyzer.analyze(Analyzer.java:75)
    at io.prestosql.execution.SqlQueryExecution.analyze(SqlQueryExecution.java:257)
    at io.prestosql.execution.SqlQueryExecution.<init>(SqlQueryExecution.java:183)
    at io.prestosql.execution.SqlQueryExecution$SqlQueryExecutionFactory.createQueryExecution(SqlQueryExecution.java:759)
    at io.prestosql.dispatcher.LocalDispatchQueryFactory.lambda$createDispatchQuery$0(LocalDispatchQueryFactory.java:123)
    at io.prestosql.$gen.Presto_344_62_gf11ee6e_dirty____20201022_070346_2.call(Unknown Source)
    at com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:125)
    at com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:69)
    at com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:78)
    at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
    at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
    at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: com.linkedin.coral.hive.hive2rel.functions.UnknownSqlFunctionException: Unknown function name: smid_udf
    at com.linkedin.coral.hive.hive2rel.functions.HiveFunctionResolver.tryResolve(HiveFunctionResolver.java:119)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitFunctionInternal(ParseTreeBuilder.java:445)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitFunction(ParseTreeBuilder.java:436)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitFunction(ParseTreeBuilder.java:69)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visit(AbstractASTVisitor.java:121)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.lambda$visitChildren$0(AbstractASTVisitor.java:284)
    at java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:195)
    at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1654)
    at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484)
    at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:474)
    at java.base/java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:913)
    at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
    at java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:578)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:285)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:279)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitSelectExpr(ParseTreeBuilder.java:451)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitSelectExpr(ParseTreeBuilder.java:69)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visit(AbstractASTVisitor.java:97)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.lambda$visitChildren$0(AbstractASTVisitor.java:284)
    at java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:195)
    at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1654)
    at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484)
    at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:474)
    at java.base/java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:913)
    at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
    at java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:578)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:285)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:279)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitSelect(ParseTreeBuilder.java:473)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitSelect(ParseTreeBuilder.java:69)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visit(AbstractASTVisitor.java:94)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.lambda$visitChildren$0(AbstractASTVisitor.java:284)
    at java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:195)
    at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1654)
    at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484)
    at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:474)
    at java.base/java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:913)
    at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
    at java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:578)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:285)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:279)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitInsert(AbstractASTVisitor.java:521)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visit(AbstractASTVisitor.java:82)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.lambda$visitChildren$0(AbstractASTVisitor.java:284)
    at java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:195)
    at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1654)
    at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484)
    at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:474)
    at java.base/java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:913)
    at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
    at java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:578)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:285)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:279)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitQueryNode(ParseTreeBuilder.java:601)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitQueryNode(ParseTreeBuilder.java:69)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visit(AbstractASTVisitor.java:64)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.lambda$visitChildren$0(AbstractASTVisitor.java:284)
    at java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:195)
    at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1654)
    at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484)
    at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:474)
    at java.base/java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:913)
    at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
    at java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:578)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:285)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:279)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitSubquery(ParseTreeBuilder.java:549)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitSubquery(ParseTreeBuilder.java:69)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visit(AbstractASTVisitor.java:48)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.lambda$visitChildren$0(AbstractASTVisitor.java:284)
    at java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:195)
    at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1654)
    at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484)
    at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:474)
    at java.base/java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:913)
    at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
    at java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:578)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:285)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:279)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitFrom(ParseTreeBuilder.java:560)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitFrom(ParseTreeBuilder.java:69)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visit(AbstractASTVisitor.java:57)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.lambda$visitChildren$0(AbstractASTVisitor.java:284)
    at java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:195)
    at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1654)
    at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484)
    at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:474)
    at java.base/java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:913)
    at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
    at java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:578)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:285)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:279)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitQueryNode(ParseTreeBuilder.java:601)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitQueryNode(ParseTreeBuilder.java:69)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visit(AbstractASTVisitor.java:64)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.lambda$visitChildren$0(AbstractASTVisitor.java:284)
    at java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:195)
    at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1654)
    at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484)
    at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:474)
    at java.base/java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:913)
    at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
    at java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:578)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:285)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visitChildren(AbstractASTVisitor.java:279)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitNil(ParseTreeBuilder.java:607)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.visitNil(ParseTreeBuilder.java:69)
    at com.linkedin.coral.hive.hive2rel.parsetree.AbstractASTVisitor.visit(AbstractASTVisitor.java:45)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.processAST(ParseTreeBuilder.java:161)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.process(ParseTreeBuilder.java:153)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.processViewOrTable(ParseTreeBuilder.java:121)
    at com.linkedin.coral.hive.hive2rel.parsetree.ParseTreeBuilder.processView(ParseTreeBuilder.java:136)
    at com.linkedin.coral.hive.hive2rel.HiveToRelConverter.convertView(HiveToRelConverter.java:89)
    at io.prestosql.plugin.hive.ViewReaderUtil$HiveViewReader.decodeViewData(ViewReaderUtil.java:138)
    ... 49 more

smid_udf is a custom hive udf

wmoustafa commented 3 years ago

Supporting native Hive UDFs in Presto is not very straightforward as the UDF model is completely different. For example, Hive uses an initialize() function in GenericUDF to define type validation and inference. On the other hand, Presto uses type signatures. Since initialize() functions (used in Hive) are more expressive than type-signature-style type derivation system (used in Presto), it is hard to express the Hive UDF as a Presto UDF.

That said, at LinkedIn we use Transport to define portable user defined functions between Hive, Spark, and Presto. Once a UDF is written in this API, its equivalent Hive, Spark, and Presto versions are generated. Coral supports converting a Hive UDF written in Transport to its Presto equivalent transparently. Please see more details in this blogpost.

findepi commented 3 years ago

Supporting native Hive UDFs in Presto is not very straightforward as the UDF model is completely different. For example, Hive uses an initialize() function in GenericUDF to define type validation and inference.

Fortunately, we do not have to go as far. I think the goal is not to provide adapter interface allowing Hive UDF execution within Presto. Instead, it's to provide all and only the necessary conversions for view text. The input would be HQL view text (String) and the output is SQL view text, with Hive UDF function calls replaced with Presto function calls (or expressions, in case of no direct 1-1 function mapping).

This should be accessible to users via configuration or extension point.

Coral supports converting a Hive UDF written in Transport to its Presto equivalent transparently. Please see more details in this blogpost.

Using example provide by @qianmoQ -- let's say i implemented smid_udf using Transport. What would be the next step to have it recognized by Coral (as integrated in Presto)?

qianmoQ commented 3 years ago

@findepi I also implemented smid_udf function in Presto, but Coral couldn't parse it

wmoustafa commented 3 years ago

Right now the mechanism is adding the mapping to Coral code. The current implementation should be easily evolvable to automatically map Transport UDFs since they follow a known pattern, and the inputs and outputs are the same.

If we want flexibility beyond changing the code, and beyond using Transport, I think a SQL API is needed at the Presto level to supply the mapping on the fly.

findepi commented 3 years ago

If we want flexibility beyond changing the code

we definitely cannot expect users to fork Coral and replace the version bundled with Presto with their own build. That would open all kinds of compatibility problems.

and beyond using Transport

@qianmoQ did you happen to implement your UDFs with Transport?

I think a SQL API is needed at the Presto level to supply the mapping on the fly.

Until functions are defined dynamically via SQL, we should not need to go that far. We could however need to provide a way to express those mappings for functions provided by custom plugins. Either the plugin could declare something, or there could be a mapping provided in the configuration somewhere.

cc @electrum

qianmoQ commented 3 years ago

If we want flexibility beyond changing the code

we definitely cannot expect users to fork Coral and replace the version bundled with Presto with their own build. That would open all kinds of compatibility problems.

and beyond using Transport

@qianmoQ did you happen to implement your UDFs with Transport?

I think a SQL API is needed at the Presto level to supply the mapping on the fly.

Until functions are defined dynamically via SQL, we should not need to go that far. We could however need to provide a way to express those mappings for functions provided by custom plugins. Either the plugin could declare something, or there could be a mapping provided in the configuration somewhere.

cc @electrum

Did you say Presto UDF or Hive UDF?

findepi commented 3 years ago

Did you say Presto UDF or Hive UDF?

Probably Presto, but i do not understand yet how an UDF implemented with Transport is recognized by Coral. I think it should not matter how you implemented your Hive UDF, since it's not loaded in Presto at runtime at all.

@wmoustafa can you clarify how Coral recognizes UDF-s implemented with Transport?

qianmoQ commented 3 years ago

@findepi

I implemented my custom UDF in Presto via the Implements Plugin,The source code is as follows

    @SqlNullable
    @ScalarFunction(value = "hello_scalar")
    @Description(value = "print hello scalar")
    @SqlType(StandardTypes.VARCHAR)
    public static Slice smid_udf(@SqlType(StandardTypes.VARCHAR) Slice string) {
        ...
    }
import com.facebook.presto.spi.Plugin;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import java.io.FileInputStream;
import java.io.IOException;
import java.net.URL;
import java.util.List;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

public class FunctionLoadPlugin implements Plugin {

    private List<Class<?>> getFunctionClasses() throws IOException {
        List<Class<?>> classes = Lists.newArrayList();
        String classResource = this.getClass().getName().replace(".", "/") + ".class";
        String jarURLFile = Thread.currentThread().getContextClassLoader().getResource(classResource).getFile();
        int jarEnd = jarURLFile.indexOf('!');
        String jarLocation = jarURLFile.substring(0, jarEnd); // This is in URL format, convert once more to get actual file location
        jarLocation = new URL(jarLocation).getFile();

        ZipInputStream zip = new ZipInputStream(new FileInputStream(jarLocation));
        for (ZipEntry entry = zip.getNextEntry(); entry != null; entry = zip.getNextEntry()) {
            if (entry.getName().endsWith(".class") && !entry.isDirectory()) {
                String className = entry.getName().replace("/", "."); // This still has .class at the end
                className = className.substring(0, className.length() - 6); // remvove .class from end
                try {
                    classes.add(Class.forName(className));
                } catch (ClassNotFoundException e) {
                }
            }
        }
        return classes;
    }

    @Override
    public Set<Class<?>> getFunctions() {
        try {
            List<Class<?>> classes = getFunctionClasses();
            Set<Class<?>> set = Sets.newHashSet();
            for (Class<?> clazz : classes) {
                if (clazz.getName().startsWith("io.edurt.tutorial.presto.scalar.function")) {
                    set.add(clazz);
                }
            }
            return ImmutableSet.<Class<?>>builder().addAll(set).build();
        } catch (IOException e) {
            return ImmutableSet.of();
        }

    }
}
wmoustafa commented 3 years ago

@wmoustafa can you clarify how Coral recognizes UDF-s implemented with Transport?

Good question, @findepi. Right now, it does not. However, it has a way to do that for Hive Generic UDFs. See this class for how this is implemented. The same approach is easily extensible to Transport UDFs, with the additional advantage that UDFs are easily mappable to their Presto versions in the case of Transport due to the straightforward mapping.

qianmoQ commented 3 years ago

@findepi @wmoustafa What is the current progress?

findepi commented 3 years ago

@qianmoQ i am not aware of anyone working on this.