apache / paimon-rust

Apache Paimon Rust The rust implementation of Apache Paimon.
https://paimon.apache.org/
Apache License 2.0
100 stars 31 forks source link

Implement Data Type test fixtures #46

Closed devillove084 closed 3 months ago

devillove084 commented 3 months ago

In the original implementation of Paimon, as_sql_string is used for output or debugging, while JSON serialization is used for TableSchema format transmission, it is essential to distinguish between these two implementations. Therefore, we require the correct data type json format output as a reference.

devillove084 commented 3 months ago

This issue is part of #42 cc @Xuanwo

Xuanwo commented 3 months ago

Thanks for driving this!

devillove084 commented 3 months ago

Great, I successfully set up the development environment for Paimon Java and its dependencies, and I reviewed the relevant code. I conducted research on all DataType types and attempted to serialize JSON results for each type. I also created a new unit test class to generate all the JSON files. The relevant code is as follows: JsonOutputUtil.java:

package org.apache.paimon.types;

import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.JsonGenerator;
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.databind.SerializationFeature;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;

public class JsonUtil {
    private static final ObjectMapper objectMapper = new ObjectMapper();

    static {
        objectMapper.configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false);
    }

    public static void writeDataTypeToJson(DataType dataType, String fileName) {
        try (FileWriter writer = new FileWriter(new File(fileName))) {
            JsonGenerator generator = objectMapper.getFactory().createGenerator(writer);
            dataType.serializeJson(generator);
            generator.close();
        } catch (IOException e) {
            throw new RuntimeException("Failed to write JSON to file: " + fileName, e);
        }
    }
}

DataTypeJsonTest.java:

package org.apache.paimon.types;

import org.junit.jupiter.api.Test;

import java.util.Arrays;
import java.io.File;

public class DataTypesJsonTest {

    @Test
    void testGenerateHighlyComplexNestedJsonFile() {
        RowType innerMostRowType1 = new RowType(Arrays.asList(
                new DataField(0, "inner1_boolean", new BooleanType()),
                new DataField(1, "inner1_int", new IntType()),
                new DataField(2, "inner1_varchar", new VarCharType(100))
        ));

        RowType innerMostRowType2 = new RowType(Arrays.asList(
                new DataField(0, "inner2_char", new CharType(50)),
                new DataField(1, "inner2_float", new FloatType()),
                new DataField(2, "inner2_binary", new BinaryType(256))
        ));

        RowType middleRowType1 = new RowType(Arrays.asList(
                new DataField(0, "middle1_decimal", new DecimalType(12, 3)),
                new DataField(1, "middle1_inner_row1", innerMostRowType1),
                new DataField(2, "middle1_array", new ArrayType(new MapType(new VarCharType(50), new IntType())))
        ));

        RowType middleRowType2 = new RowType(Arrays.asList(
                new DataField(0, "middle2_multiset", new MultisetType(new TimestampType(6))),
                new DataField(1, "middle2_inner_row2", innerMostRowType2),
                new DataField(2, "middle2_map", new MapType(new CharType(10), innerMostRowType1))
        ));

        RowType outerRowType = new RowType(Arrays.asList(
                new DataField(0, "outer_row1", middleRowType1),
                new DataField(1, "outer_row2", middleRowType2),
                new DataField(2, "outer_map", new MapType(new VarCharType(30), middleRowType1)),
                new DataField(3, "outer_array", new ArrayType(middleRowType2)),
                new DataField(4, "outer_multiset", new MultisetType(new RowType(Arrays.asList(
                        new DataField(0, "deep_inner_decimal", new DecimalType(10, 2)),
                        new DataField(1, "deep_inner_varbinary", new VarBinaryType(128))
                ))))
        ));

        generateJsonForDataType(outerRowType, "highly_complex_nested_row_type.json");
    }

    @Test
    void testGenerateJsonFiles() {
        // generate nullable and valuable json
        generateJsonForDataType(new BooleanType(), "boolean_type_nullable.json");
        generateJsonForDataType(new BooleanType(false), "boolean_type.json");

        generateJsonForDataType(new TinyIntType(), "tinyint_type_nullable.json");
        generateJsonForDataType(new TinyIntType(false), "tinyint_type.json");

        generateJsonForDataType(new SmallIntType(), "smallint_type_nullable.json");
        generateJsonForDataType(new SmallIntType(false), "smallint_type.json");

        generateJsonForDataType(new IntType(), "int_type_nullable.json");
        generateJsonForDataType(new IntType(false), "int_type.json");

        generateJsonForDataType(new BigIntType(), "bigint_type_nullable.json");
        generateJsonForDataType(new BigIntType(false), "bigint_type.json");

        generateJsonForDataType(new DecimalType(10, 2), "decimal_type_nullable.json");
        generateJsonForDataType(new DecimalType(false, 10, 2), "decimal_type.json");

        generateJsonForDataType(new DoubleType(), "double_type_nullable.json");
        generateJsonForDataType(new DoubleType(false), "double_type.json");

        generateJsonForDataType(new FloatType(), "float_type_nullable.json");
        generateJsonForDataType(new FloatType(false), "float_type.json");

        generateJsonForDataType(new BinaryType(22), "binary_type_nullable.json");
        generateJsonForDataType(new BinaryType(false, 22), "binary_type.json");

        generateJsonForDataType(new VarBinaryType(233), "varbinary_type_nullable.json");
        generateJsonForDataType(new VarBinaryType(false, 233), "varbinary_type.json");

        generateJsonForDataType(new CharType(33), "char_type_nullable.json");
        generateJsonForDataType(new CharType(false, 33), "char_type.json");

        generateJsonForDataType(new VarCharType(33), "varchar_type_nullable.json");
        generateJsonForDataType(new VarCharType(false, 33), "varchar_type.json");

        generateJsonForDataType(new DateType(), "date_type_nullable.json");
        generateJsonForDataType(new DateType(false), "date_type.json");

        generateJsonForDataType(new LocalZonedTimestampType(), "local_zoned_timestamp_type_nullable.json");
        generateJsonForDataType(new LocalZonedTimestampType(false, 3), "local_zoned_timestamp_type.json");

        generateJsonForDataType(new TimeType(), "time_type_nullable.json");
        generateJsonForDataType(new TimeType(false, 9), "time_type.json");

        generateJsonForDataType(new TimestampType(), "timestamp_type_nullable.json");
        generateJsonForDataType(new TimestampType(false, 6), "timestamp_type.json");

        generateJsonForDataType(new ArrayType(new IntType()), "array_type_nullable.json");
        generateJsonForDataType(new ArrayType(false, new IntType(false)), "array_type.json");

        generateJsonForDataType(new MapType(new VarCharType(20), new IntType()), "map_type_nullable.json");
        generateJsonForDataType(new MapType(false, new VarCharType(20), new IntType(false)), "map_type.json");

        generateJsonForDataType(new MultisetType(new IntType()), "multiset_type_nullable.json");
        generateJsonForDataType(new MultisetType(false, new IntType(false)), "multiset_type.json");

        generateJsonForDataType(new RowType(Arrays.asList(new DataField(0, "a", new IntType()), new DataField(1, "b", new VarCharType(20)))), "row_type_nullable.json");
        generateJsonForDataType(new RowType(false, Arrays.asList(new DataField(0, "a", new IntType(false)), new DataField(1, "b", new VarCharType(false, 20)))), "row_type.json");

    }

    private void generateJsonForDataType(DataType dataType, String fileName) {
        String directoryPath = "/tmp/fixtures";
        File directory = new File(directoryPath);
        if (!directory.exists()) {
            directory.mkdirs();
        }
        String filePath = directoryPath + "/" + fileName;
        JsonUtil.writeDataTypeToJson(dataType, filePath);
        System.out.println("Generated JSON for " + dataType.getClass().getSimpleName() + ": " + filePath);
    }
}
Xuanwo commented 3 months ago

Perfect, thanks!