select
csv.row_id,
csv.name,
parquet.vehicle,
parquet.model_year
from
dfs.tmp.`D:/data/other/claims.csv` as csv
left join
dfs.tmp.`D:/data/other/claims.parquet` as parquet
on
csv.row_id = parquet.row_id
where
csv.blind_make = 'AR'
limit 5;
select csv.row_id, csv.name, parquet.vehicle, parquet.model_year from dfs.tmp.`D:/data/other/claims.csv` csv left join dfs.tmp.`D:/data/other/claims.parquet` parquet on csv.row_id = parquet.row_id where csv.blind_make = "AR" limit 5;
library(sergeant)
# use localhost if running standalone on same system otherwise the host or IP of your Drill server
ds <- src_drill("localhost")
ds
dc <- drill_connection()
dc
dc %>% drill_status()
dc %>% drill_version()
dc %>% drill_metrics()
dc %>% drill_options()
dc %>% drill_stats()
dc %>% drill_storage()
dc %>% drill_threads()
dc %>% drill_show_files("dfs.tmp")
dc %>% drill_show_schemas()
library(tidyverse)
#ds %>% drill_version()
see available methods
sql_translate_env(src_drill()$con)
dc %>%
drill_query("SELECT * FROM dfs.`C:/data/flight_delay/Flight_Delays_Sample.csv` limit 5")
dc %>%
drill_query("SELECT * FROM dfs.`C:/data/flight_delay/Flight_Delays_Sample.parquet` limit 5")
apache drill & parquet
D:/data/other/claims.csv
limit 5;D:/data/other/claims.parquet
AS SELECT * FROM dfs.csv.D:/data/other/claims.csv
;D:/data/other/claims.parquet
AS SELECT * FROM dfs.csv.D:/data/other/claims.csv
;D:/data/other/claims.parquet
limit 5;current
= true;D:/data/other/claims.csv
limit 5;title: "R Notebook" output: html_notebook editor_options: chunk_output_type: console
see available methods