Closed devillove084 closed 3 months ago
This issue is part of #42 cc @Xuanwo
Thanks for driving this!
Great, I successfully set up the development environment for Paimon Java and its dependencies, and I reviewed the relevant code. I conducted research on all DataType types and attempted to serialize JSON results for each type. I also created a new unit test class to generate all the JSON files. The relevant code is as follows: JsonOutputUtil.java:
package org.apache.paimon.types;
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.JsonGenerator;
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.databind.SerializationFeature;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
public class JsonUtil {
private static final ObjectMapper objectMapper = new ObjectMapper();
static {
objectMapper.configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false);
}
public static void writeDataTypeToJson(DataType dataType, String fileName) {
try (FileWriter writer = new FileWriter(new File(fileName))) {
JsonGenerator generator = objectMapper.getFactory().createGenerator(writer);
dataType.serializeJson(generator);
generator.close();
} catch (IOException e) {
throw new RuntimeException("Failed to write JSON to file: " + fileName, e);
}
}
}
DataTypeJsonTest.java:
package org.apache.paimon.types;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.io.File;
public class DataTypesJsonTest {
@Test
void testGenerateHighlyComplexNestedJsonFile() {
RowType innerMostRowType1 = new RowType(Arrays.asList(
new DataField(0, "inner1_boolean", new BooleanType()),
new DataField(1, "inner1_int", new IntType()),
new DataField(2, "inner1_varchar", new VarCharType(100))
));
RowType innerMostRowType2 = new RowType(Arrays.asList(
new DataField(0, "inner2_char", new CharType(50)),
new DataField(1, "inner2_float", new FloatType()),
new DataField(2, "inner2_binary", new BinaryType(256))
));
RowType middleRowType1 = new RowType(Arrays.asList(
new DataField(0, "middle1_decimal", new DecimalType(12, 3)),
new DataField(1, "middle1_inner_row1", innerMostRowType1),
new DataField(2, "middle1_array", new ArrayType(new MapType(new VarCharType(50), new IntType())))
));
RowType middleRowType2 = new RowType(Arrays.asList(
new DataField(0, "middle2_multiset", new MultisetType(new TimestampType(6))),
new DataField(1, "middle2_inner_row2", innerMostRowType2),
new DataField(2, "middle2_map", new MapType(new CharType(10), innerMostRowType1))
));
RowType outerRowType = new RowType(Arrays.asList(
new DataField(0, "outer_row1", middleRowType1),
new DataField(1, "outer_row2", middleRowType2),
new DataField(2, "outer_map", new MapType(new VarCharType(30), middleRowType1)),
new DataField(3, "outer_array", new ArrayType(middleRowType2)),
new DataField(4, "outer_multiset", new MultisetType(new RowType(Arrays.asList(
new DataField(0, "deep_inner_decimal", new DecimalType(10, 2)),
new DataField(1, "deep_inner_varbinary", new VarBinaryType(128))
))))
));
generateJsonForDataType(outerRowType, "highly_complex_nested_row_type.json");
}
@Test
void testGenerateJsonFiles() {
// generate nullable and valuable json
generateJsonForDataType(new BooleanType(), "boolean_type_nullable.json");
generateJsonForDataType(new BooleanType(false), "boolean_type.json");
generateJsonForDataType(new TinyIntType(), "tinyint_type_nullable.json");
generateJsonForDataType(new TinyIntType(false), "tinyint_type.json");
generateJsonForDataType(new SmallIntType(), "smallint_type_nullable.json");
generateJsonForDataType(new SmallIntType(false), "smallint_type.json");
generateJsonForDataType(new IntType(), "int_type_nullable.json");
generateJsonForDataType(new IntType(false), "int_type.json");
generateJsonForDataType(new BigIntType(), "bigint_type_nullable.json");
generateJsonForDataType(new BigIntType(false), "bigint_type.json");
generateJsonForDataType(new DecimalType(10, 2), "decimal_type_nullable.json");
generateJsonForDataType(new DecimalType(false, 10, 2), "decimal_type.json");
generateJsonForDataType(new DoubleType(), "double_type_nullable.json");
generateJsonForDataType(new DoubleType(false), "double_type.json");
generateJsonForDataType(new FloatType(), "float_type_nullable.json");
generateJsonForDataType(new FloatType(false), "float_type.json");
generateJsonForDataType(new BinaryType(22), "binary_type_nullable.json");
generateJsonForDataType(new BinaryType(false, 22), "binary_type.json");
generateJsonForDataType(new VarBinaryType(233), "varbinary_type_nullable.json");
generateJsonForDataType(new VarBinaryType(false, 233), "varbinary_type.json");
generateJsonForDataType(new CharType(33), "char_type_nullable.json");
generateJsonForDataType(new CharType(false, 33), "char_type.json");
generateJsonForDataType(new VarCharType(33), "varchar_type_nullable.json");
generateJsonForDataType(new VarCharType(false, 33), "varchar_type.json");
generateJsonForDataType(new DateType(), "date_type_nullable.json");
generateJsonForDataType(new DateType(false), "date_type.json");
generateJsonForDataType(new LocalZonedTimestampType(), "local_zoned_timestamp_type_nullable.json");
generateJsonForDataType(new LocalZonedTimestampType(false, 3), "local_zoned_timestamp_type.json");
generateJsonForDataType(new TimeType(), "time_type_nullable.json");
generateJsonForDataType(new TimeType(false, 9), "time_type.json");
generateJsonForDataType(new TimestampType(), "timestamp_type_nullable.json");
generateJsonForDataType(new TimestampType(false, 6), "timestamp_type.json");
generateJsonForDataType(new ArrayType(new IntType()), "array_type_nullable.json");
generateJsonForDataType(new ArrayType(false, new IntType(false)), "array_type.json");
generateJsonForDataType(new MapType(new VarCharType(20), new IntType()), "map_type_nullable.json");
generateJsonForDataType(new MapType(false, new VarCharType(20), new IntType(false)), "map_type.json");
generateJsonForDataType(new MultisetType(new IntType()), "multiset_type_nullable.json");
generateJsonForDataType(new MultisetType(false, new IntType(false)), "multiset_type.json");
generateJsonForDataType(new RowType(Arrays.asList(new DataField(0, "a", new IntType()), new DataField(1, "b", new VarCharType(20)))), "row_type_nullable.json");
generateJsonForDataType(new RowType(false, Arrays.asList(new DataField(0, "a", new IntType(false)), new DataField(1, "b", new VarCharType(false, 20)))), "row_type.json");
}
private void generateJsonForDataType(DataType dataType, String fileName) {
String directoryPath = "/tmp/fixtures";
File directory = new File(directoryPath);
if (!directory.exists()) {
directory.mkdirs();
}
String filePath = directoryPath + "/" + fileName;
JsonUtil.writeDataTypeToJson(dataType, filePath);
System.out.println("Generated JSON for " + dataType.getClass().getSimpleName() + ": " + filePath);
}
}
Perfect, thanks!
In the original implementation of Paimon, as_sql_string is used for output or debugging, while JSON serialization is used for TableSchema format transmission, it is essential to distinguish between these two implementations. Therefore, we require the correct data type json format output as a reference.