trinodb / trino

Official repository of Trino, the distributed SQL query engine for big data, formerly known as PrestoSQL (https://trino.io)
https://trino.io
Apache License 2.0
10.21k stars 2.94k forks source link

Hive connector can write numeric NaN and ±infinity with JSON format, but can't read the value #20395

Open ebyhr opened 8 months ago

ebyhr commented 8 months ago
create table test_nan with (format = 'json') as select nan() a;
table test_nan;

Failed to read file at /var/folders/9s/_zwn4r_n2_9bp0krllp1pl3c0000gp/T/TrinoTest16541592175967501555/hive_data/tpch/test_nan/20240117_001008_00024_7aya8_e9ee5153-2b74-4446-a143-49470ee8dd3b.gz
io.trino.spi.TrinoException: Failed to read file at /var/folders/9s/_zwn4r_n2_9bp0krllp1pl3c0000gp/T/TrinoTest16541592175967501555/hive_data/tpch/test_nan/20240117_001008_00024_7aya8_e9ee5153-2b74-4446-a143-49470ee8dd3b.gz
    at io.trino.plugin.hive.line.LinePageSource.getNextPage(LinePageSource.java:75)
    at io.trino.plugin.hive.HivePageSource.getNextPage(HivePageSource.java:166)
    at io.trino.operator.TableScanOperator.getOutput(TableScanOperator.java:266)
    at io.trino.operator.Driver.processInternal(Driver.java:398)
    at io.trino.operator.Driver.lambda$process$8(Driver.java:301)
    at io.trino.operator.Driver.tryWithLock(Driver.java:704)
    at io.trino.operator.Driver.process(Driver.java:293)
    at io.trino.operator.Driver.processForDuration(Driver.java:264)
    at io.trino.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:887)
    at io.trino.execution.executor.timesharing.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:187)
    at io.trino.execution.executor.timesharing.TimeSharingTaskExecutor$TaskRunner.run(TimeSharingTaskExecutor.java:565)
    at io.trino.$gen.Trino_testversion____20240117_000919_1.run(Unknown Source)
    at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
    at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
    at java.base/java.lang.Thread.run(Thread.java:1583)
Caused by: com.fasterxml.jackson.core.JsonParseException: Current token (VALUE_STRING) not numeric, can not use numeric value accessors
 at [Source: (byte[])"{"a":"NaN"}"; line: 1, column: 7]
    at com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:2477)
    at com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:755)
    at com.fasterxml.jackson.core.base.ParserBase._parseNumericValue(ParserBase.java:936)
    at com.fasterxml.jackson.core.base.ParserBase.getDoubleValue(ParserBase.java:830)
    at io.trino.hive.formats.line.json.JsonDeserializer$DoubleDecoder.decodeValue(JsonDeserializer.java:403)
    at io.trino.hive.formats.line.json.JsonDeserializer$Decoder.decode(JsonDeserializer.java:243)
    at io.trino.hive.formats.line.json.JsonDeserializer$RowDecoder.decodeValue(JsonDeserializer.java:718)
    at io.trino.hive.formats.line.json.JsonDeserializer$RowDecoder.decode(JsonDeserializer.java:688)
    at io.trino.hive.formats.line.json.JsonDeserializer.deserialize(JsonDeserializer.java:157)
    at io.trino.plugin.hive.line.LinePageSource.getNextPage(LinePageSource.java:62)
create table test_inf with (format = 'json') as select infinity() a;
table test_inf;

Failed to read file at /var/folders/9s/_zwn4r_n2_9bp0krllp1pl3c0000gp/T/TrinoTest16541592175967501555/hive_data/tpch/test_inf/20240117_001337_00026_7aya8_7924f951-0c9a-4914-8512-ef47571a42f0.gz
io.trino.spi.TrinoException: Failed to read file at /var/folders/9s/_zwn4r_n2_9bp0krllp1pl3c0000gp/T/TrinoTest16541592175967501555/hive_data/tpch/test_inf/20240117_001337_00026_7aya8_7924f951-0c9a-4914-8512-ef47571a42f0.gz
    at io.trino.plugin.hive.line.LinePageSource.getNextPage(LinePageSource.java:75)
    at io.trino.plugin.hive.HivePageSource.getNextPage(HivePageSource.java:166)
    at io.trino.operator.TableScanOperator.getOutput(TableScanOperator.java:266)
    at io.trino.operator.Driver.processInternal(Driver.java:398)
    at io.trino.operator.Driver.lambda$process$8(Driver.java:301)
    at io.trino.operator.Driver.tryWithLock(Driver.java:704)
    at io.trino.operator.Driver.process(Driver.java:293)
    at io.trino.operator.Driver.processForDuration(Driver.java:264)
    at io.trino.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:887)
    at io.trino.execution.executor.timesharing.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:187)
    at io.trino.execution.executor.timesharing.TimeSharingTaskExecutor$TaskRunner.run(TimeSharingTaskExecutor.java:565)
    at io.trino.$gen.Trino_testversion____20240117_000919_1.run(Unknown Source)
    at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
    at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
    at java.base/java.lang.Thread.run(Thread.java:1583)
Caused by: com.fasterxml.jackson.core.JsonParseException: Current token (VALUE_STRING) not numeric, can not use numeric value accessors
 at [Source: (byte[])"{"a":"Infinity"}"; line: 1, column: 7]
    at com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:2477)
    at com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:755)
    at com.fasterxml.jackson.core.base.ParserBase._parseNumericValue(ParserBase.java:936)
    at com.fasterxml.jackson.core.base.ParserBase.getDoubleValue(ParserBase.java:830)
    at io.trino.hive.formats.line.json.JsonDeserializer$DoubleDecoder.decodeValue(JsonDeserializer.java:403)
    at io.trino.hive.formats.line.json.JsonDeserializer$Decoder.decode(JsonDeserializer.java:243)
    at io.trino.hive.formats.line.json.JsonDeserializer$RowDecoder.decodeValue(JsonDeserializer.java:718)
    at io.trino.hive.formats.line.json.JsonDeserializer$RowDecoder.decode(JsonDeserializer.java:688)
    at io.trino.hive.formats.line.json.JsonDeserializer.deserialize(JsonDeserializer.java:157)
    at io.trino.plugin.hive.line.LinePageSource.getNextPage(LinePageSource.java:62)
ebyhr commented 8 months ago

Probably, we should throw an exception when writing such values.

hashhar commented 8 months ago

Indeed, NaN and Infinity are not valid in JSON. https://www.json.org/json-en.html