Kyligence / calcite

a tailored Apache Calcite for Apache Kylin, more details at http://mail-archives.apache.org/mod_mbox/kylin-dev/201704.mbox/%3CCAF7etT=wEBPKm4C_6ffssQ0=kEhD=j1jz3O9DpjC+Zu9xWU=5A@mail.gmail.com%3E . Isn't AtopCalcite in Kylin enough? It depends on tomcat feature that's no longer supported in tomcat 8
Apache License 2.0
14 stars 53 forks source link

Introduce calcite 2902 #300

Open Mukvin opened 2 years ago

Mukvin commented 2 years ago

Calcite Upgrade Report


Benchmark

package org.apache.kylin.query.engine;

import org.apache.calcite.rel.RelRoot;
import org.apache.calcite.sql.parser.SqlParseException;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.NLocalFileMetadataTestCase;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.results.format.ResultFormatType;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.util.concurrent.TimeUnit;

@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 1)
@Measurement(iterations = 30)
@Threads(8)
@Fork(value = 1, jvmArgs = {"-Xms4G", "-Xmx4G"})
@State(Scope.Benchmark)
public class QueryOptimizerBenchmark {

    private QueryExec qe;
    private String sql;

    private RelRoot node;

    @Setup
    public void setUp() throws Exception {
        NLocalFileMetadataTestCase case1 = new NLocalFileMetadataTestCase();
        case1.createTestMetadata();
        KylinConfig.getInstanceFromEnv().setProperty("kylin.query.dataframe-cache-enabled", "false");
    }

    @Benchmark
    public void optimizeRel() throws SqlParseException {
        String sql = "SELECT \n" +
                "P_LINEORDER.LO_SHIPMODE as P_LINEORDER_LO_SHIPMODE,\n" +
                "P_LINEORDER.LO_LINENUMBER as P_LINEORDER_LO_LINENUMBER,\n" +
                "P_LINEORDER.LO_ORDTOTALPRICE as P_LINEORDER_LO_ORDTOTALPRICE,\n" +
                "P_LINEORDER.LO_SUPPLYCOST as P_LINEORDER_LO_SUPPLYCOST,\n" +
                "P_LINEORDER.LO_SUPPKEY as P_LINEORDER_LO_SUPPKEY,\n" +
                "P_LINEORDER.LO_QUANTITY as P_LINEORDER_LO_QUANTITY,\n" +
                "P_LINEORDER.LO_PARTKEY as P_LINEORDER_LO_PARTKEY,\n" +
                "P_LINEORDER.LO_ORDERKEY as P_LINEORDER_LO_ORDERKEY,\n" +
                "P_LINEORDER.LO_CUSTKEY as P_LINEORDER_LO_CUSTKEY,\n" +
                "P_LINEORDER.LO_SHIPPRIOTITY as P_LINEORDER_LO_SHIPPRIOTITY,\n" +
                "P_LINEORDER.LO_DISCOUNT as P_LINEORDER_LO_DISCOUNT,\n" +
                "P_LINEORDER.LO_ORDERPRIOTITY as P_LINEORDER_LO_ORDERPRIOTITY,\n" +
                "P_LINEORDER.LO_ORDERDATE as P_LINEORDER_LO_ORDERDATE,\n" +
                "P_LINEORDER.LO_REVENUE as P_LINEORDER_LO_REVENUE,\n" +
                "P_LINEORDER.V_REVENUE as P_LINEORDER_V_REVENUE,\n" +
                "P_LINEORDER.LO_COMMITDATE as P_LINEORDER_LO_COMMITDATE,\n" +
                "P_LINEORDER.LO_EXTENDEDPRICE as P_LINEORDER_LO_EXTENDEDPRICE,\n" +
                "P_LINEORDER.LO_TAX as P_LINEORDER_LO_TAX,\n" +
                "DATES.D_WEEKNUMINYEAR as DATES_D_WEEKNUMINYEAR,\n" +
                "DATES.D_LASTDAYINWEEKFL as DATES_D_LASTDAYINWEEKFL,\n" +
                "DATES.D_LASTDAYINMONTHFL as DATES_D_LASTDAYINMONTHFL,\n" +
                "DATES.D_DAYOFWEEK as DATES_D_DAYOFWEEK,\n" +
                "DATES.D_MONTHNUMINYEAR as DATES_D_MONTHNUMINYEAR,\n" +
                "DATES.D_YEARMONTHNUM as DATES_D_YEARMONTHNUM,\n" +
                "DATES.D_YEARMONTH as DATES_D_YEARMONTH,\n" +
                "DATES.D_DAYNUMINMONTH as DATES_D_DAYNUMINMONTH,\n" +
                "DATES.D_SELLINGSEASON as DATES_D_SELLINGSEASON,\n" +
                "DATES.D_WEEKDAYFL as DATES_D_WEEKDAYFL,\n" +
                "DATES.D_YEAR as DATES_D_YEAR,\n" +
                "DATES.D_HOLIDAYFL as DATES_D_HOLIDAYFL,\n" +
                "DATES.D_DAYNUMINWEEK as DATES_D_DAYNUMINWEEK,\n" +
                "DATES.D_DAYNUMINYEAR as DATES_D_DAYNUMINYEAR,\n" +
                "DATES.D_DATE as DATES_D_DATE,\n" +
                "DATES.D_MONTH as DATES_D_MONTH,\n" +
                "DATES.D_DATEKEY as DATES_D_DATEKEY,\n" +
                "CUSTOMER.C_ADDRESS as CUSTOMER_C_ADDRESS,\n" +
                "CUSTOMER.C_NATION as CUSTOMER_C_NATION,\n" +
                "CUSTOMER.C_CITY as CUSTOMER_C_CITY,\n" +
                "CUSTOMER.C_PHONE as CUSTOMER_C_PHONE,\n" +
                "CUSTOMER.C_REGION as CUSTOMER_C_REGION,\n" +
                "CUSTOMER.C_NAME as CUSTOMER_C_NAME,\n" +
                "CUSTOMER.C_MKTSEGMENT as CUSTOMER_C_MKTSEGMENT,\n" +
                "CUSTOMER.C_CUSTKEY as CUSTOMER_C_CUSTKEY,\n" +
                "PART.P_PARTKEY as PART_P_PARTKEY,\n" +
                "PART.P_CONTAINER as PART_P_CONTAINER,\n" +
                "PART.P_SIZE as PART_P_SIZE,\n" +
                "PART.P_NAME as PART_P_NAME,\n" +
                "PART.P_CATEGORY as PART_P_CATEGORY,\n" +
                "PART.P_TYPE as PART_P_TYPE,\n" +
                "PART.P_MFGR as PART_P_MFGR,\n" +
                "PART.P_BRAND as PART_P_BRAND,\n" +
                "PART.P_COLOR as PART_P_COLOR,\n" +
                "SUPPLIER.S_ADDRESS as SUPPLIER_S_ADDRESS,\n" +
                "SUPPLIER.S_NAME as SUPPLIER_S_NAME,\n" +
                "SUPPLIER.S_NATION as SUPPLIER_S_NATION,\n" +
                "SUPPLIER.S_SUPPKEY as SUPPLIER_S_SUPPKEY,\n" +
                "SUPPLIER.S_REGION as SUPPLIER_S_REGION,\n" +
                "SUPPLIER.S_PHONE as SUPPLIER_S_PHONE,\n" +
                "SUPPLIER.S_CITY as SUPPLIER_S_CITY\n" +
                "FROM \n" +
                "SSB.P_LINEORDER as P_LINEORDER \n" +
                "LEFT JOIN SSB.DATES as DATES\n" +
                "ON P_LINEORDER.LO_ORDERDATE=DATES.D_DATEKEY\n" +
                "LEFT JOIN SSB.CUSTOMER as CUSTOMER\n" +
                "ON P_LINEORDER.LO_CUSTKEY=CUSTOMER.C_CUSTKEY\n" +
                "LEFT JOIN SSB.PART as PART\n" +
                "ON P_LINEORDER.LO_PARTKEY=PART.P_PARTKEY\n" +
                "LEFT JOIN SSB.SUPPLIER as SUPPLIER\n" +
                "ON P_LINEORDER.LO_SUPPKEY=SUPPLIER.S_SUPPKEY";
        QueryExec qe = new QueryExec("demo", KylinConfig.getInstanceFromEnv());
        RelRoot node = qe.sqlConverter.convertSqlToRelNode(sql);
        for(int i = 0; i <= 3000; i++){
            qe.optimize(node);
        }
    }

    public static void main(String[] args) throws Exception {
        Options opts = new OptionsBuilder()
                .include(QueryOptimizerBenchmark.class.getSimpleName())
                .resultFormat(ResultFormatType.JSON)
                .build();
        new Runner(opts).run();
    }
}

Before(1.116.0-kylin-4.x-r021)

4804.941 ±(99.9%) 1623.696 ms/op
Iteration   1: 5926.838 ±(99.9%) 83.002 ms/op
Iteration   2: 6669.888 ±(99.9%) 302.045 ms/op
Iteration   3: 4533.345 ±(99.9%) 37.553 ms/op
Iteration   4: 4154.668 ±(99.9%) 53.689 ms/op
Iteration   5: 3836.025 ±(99.9%) 78.322 ms/op
Iteration   6: 3827.318 ±(99.9%) 47.342 ms/op
Iteration   7: 3966.734 ±(99.9%) 110.246 ms/op
Iteration   8: 3528.319 ±(99.9%) 36.724 ms/op
Iteration   9: 4548.459 ±(99.9%) 236.878 ms/op
Iteration  10: 3564.822 ±(99.9%) 130.477 ms/op
Iteration  11: 3532.063 ±(99.9%) 51.543 ms/op
Iteration  12: 3641.469 ±(99.9%) 69.136 ms/op
Iteration  13: 4020.822 ±(99.9%) 56.725 ms/op
Iteration  14: 3670.644 ±(99.9%) 53.022 ms/op
Iteration  15: 3644.789 ±(99.9%) 76.941 ms/op
Iteration  16: 3654.671 ±(99.9%) 87.807 ms/op
Iteration  17: 3909.223 ±(99.9%) 81.267 ms/op
Iteration  18: 3473.783 ±(99.9%) 51.718 ms/op
Iteration  19: 5288.901 ±(99.9%) 345.911 ms/op
Iteration  20: 3721.911 ±(99.9%) 33.372 ms/op
Iteration  21: 3661.732 ±(99.9%) 111.323 ms/op
Iteration  22: 3655.741 ±(99.9%) 39.567 ms/op
Iteration  23: 4405.195 ±(99.9%) 168.399 ms/op
Iteration  24: 3867.189 ±(99.9%) 45.348 ms/op
Iteration  25: 3907.763 ±(99.9%) 60.003 ms/op
Iteration  26: 4019.724 ±(99.9%) 97.995 ms/op
Iteration  27: 4609.844 ±(99.9%) 104.770 ms/op
Iteration  28: 4496.068 ±(99.9%) 148.081 ms/op
Iteration  29: 3811.977 ±(99.9%) 134.557 ms/op
Iteration  30: 3948.184 ±(99.9%) 96.599 ms/op

Result "org.apache.kylin.query.engine.QueryOptimizerBenchmark.optimizeRel":
  4116.604 ±(99.9%) 485.558 ms/op [Average]
  (min, avg, max) = (3473.783, 4116.604, 6669.888), stdev = 726.761
  CI (99.9%): [3631.045, 4602.162] (assumes normal distribution)

# Run complete. Total time: 00:08:01

REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on
why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial
experiments, perform baseline and negative tests that provide experimental control, make sure
the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts.
Do not assume the numbers tell you what you want them to tell.

Benchmark                            Mode  Cnt     Score     Error  Units
QueryOptimizerBenchmark.optimizeRel  avgt   30  4116.604 ± 485.558  ms/op

After(1.116.0-kylin-4.x-r024)

3512.040 ±(99.9%) 1539.879 ms/op
Iteration   1: 3209.740 ±(99.9%) 41.534 ms/op
Iteration   2: 2883.053 ±(99.9%) 65.956 ms/op
Iteration   3: 2889.543 ±(99.9%) 43.021 ms/op
Iteration   4: 2687.193 ±(99.9%) 65.863 ms/op
Iteration   5: 2775.590 ±(99.9%) 47.606 ms/op
Iteration   6: 2722.390 ±(99.9%) 63.297 ms/op
Iteration   7: 2715.576 ±(99.9%) 72.264 ms/op
Iteration   8: 2714.370 ±(99.9%) 64.343 ms/op
Iteration   9: 2778.215 ±(99.9%) 61.565 ms/op
Iteration  10: 2774.280 ±(99.9%) 53.732 ms/op
Iteration  11: 2814.291 ±(99.9%) 51.687 ms/op
Iteration  12: 2820.700 ±(99.9%) 32.117 ms/op
Iteration  13: 2808.322 ±(99.9%) 43.454 ms/op
Iteration  14: 2770.112 ±(99.9%) 25.359 ms/op
Iteration  15: 2816.414 ±(99.9%) 36.400 ms/op
Iteration  16: 2743.821 ±(99.9%) 31.754 ms/op
Iteration  17: 2765.046 ±(99.9%) 67.956 ms/op
Iteration  18: 2762.065 ±(99.9%) 41.252 ms/op
Iteration  19: 2792.257 ±(99.9%) 49.531 ms/op
Iteration  20: 2751.313 ±(99.9%) 35.463 ms/op
Iteration  21: 2737.771 ±(99.9%) 51.313 ms/op
Iteration  22: 2924.571 ±(99.9%) 37.405 ms/op
Iteration  23: 2834.411 ±(99.9%) 41.520 ms/op
Iteration  24: 2740.439 ±(99.9%) 31.275 ms/op
Iteration  25: 2728.094 ±(99.9%) 48.697 ms/op
Iteration  26: 2733.819 ±(99.9%) 35.052 ms/op
Iteration  27: 2748.317 ±(99.9%) 74.314 ms/op
Iteration  28: 2722.741 ±(99.9%) 41.552 ms/op
Iteration  29: 2896.345 ±(99.9%) 53.009 ms/op
Iteration  30: 3115.371 ±(99.9%) 55.581 ms/op

Result "org.apache.kylin.query.engine.QueryOptimizerBenchmark.optimizeRel":
  2805.872 ±(99.9%) 76.265 ms/op [Average]
  (min, avg, max) = (2687.193, 2805.872, 3209.740), stdev = 114.150
  CI (99.9%): [2729.607, 2882.137] (assumes normal distribution)

# Run complete. Total time: 00:07:09

REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on
why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial
experiments, perform baseline and negative tests that provide experimental control, make sure
the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts.
Do not assume the numbers tell you what you want them to tell.

Benchmark                            Mode  Cnt     Score    Error  Units
QueryOptimizerBenchmark.optimizeRel  avgt   30  2805.872 ± 76.265  ms/op

Conclusion

>>> before=4116.604
>>> after=2805.872
>>> improve=(before - after)/before
>>> improve
0.3184012841652975

Improve 32.84%