apache / incubator-gluten

Gluten is a middle layer responsible for offloading JVM-based SQL engines' execution to native engines.
https://gluten.apache.org/
Apache License 2.0
1.22k stars 439 forks source link

[CH] nested column pruning doesn't work for Project(Filter(Generate)) whether in gluten or vanilla spark #7868

Closed taiyang-li closed 2 weeks ago

taiyang-li commented 3 weeks ago

Description

set spark.sql.planChangeLog.level = error; 
set spark.gluten.enabled = false; 

CREATE TABLE aj (
country STRING,
event STRUCT<time:BIGINT, lng:BIGINT, lat:BIGINT, net:STRING,
log_extra:MAP<STRING, STRING>, event_id:STRING, event_info:MAP<STRING, STRING>>
)
USING orc;

INSERT INTO aj VALUES
('USA', named_struct('time', 1622547800, 'lng', -122, 'lat', 37, 'net',
'wifi', 'log_extra', map('key1', 'value1'), 'event_id', 'event1',
'event_info', map('tab_type', '5', 'action', '13'))),
('Canada', named_struct('time', 1622547801, 'lng', -79, 'lat', 43, 'net',
'4g', 'log_extra', map('key2', 'value2'), 'event_id', 'event2',
'event_info', map('tab_type', '4', 'action', '12'))); 

explain extended 
SELECT * FROM (
SELECT
game_name,
CASE WHEN
event.event_info['tab_type'] IN (5) THEN '1' ELSE '0' END AS entrance
FROM aj
LATERAL VIEW explode(split(country, ', ')) game_name AS game_name
WHERE event.event_info['action'] IN (13)
) WHERE game_name = 'xxx';