Open PYTHONrohit7 opened 1 year ago
create database hive_class_b1; use hive_class_b1;
hive>create table department_data
hive> (
hive> dept_id int,
hive> dept_name string,
hive> manager_id int,
hive> salary int)
hive> row format delimited
hive> fields terminated by ',';
hive>describe department_data;
hive>describe formatted department_data;
hive>load data local inpath 'file:///tmp/hive_class/depart_data.csv' into table department_data;
hive>set hive.cli.print.header = true;
hive>load data inpath '/tmp/hive_data_class_2/' into table department_data_from_hdfs;
hive>create external table department_data_external
hive> (
hive> dept_id int,
hive> dept_name string,
hive> manager_id int,
hive> salary int
hive> )
hive> row format delimited
hive> fields terminated by ','
hive> location '/tmp/hive_data_class_2/';
hive>create table employee
hive> (
hive> id int,
hive> name string,
hive> skills array
hive> )
hive> row format delimited
hive> fields terminated by ','
hive> collection items terminated by ':';
hive>load data local inpath 'file:///tmp/hive_class/array_data.csv' into table employee;
hive>select id, name, skills[0] as prime_skill from employee;
hive>select
hive> id,
hive> name,
hive> size(skills) as size_of_each_array,
hive> array_contains(skills,"HADOOP") as knows_hadoop,
hive> sort_array(skills) as sorted_array
hive> from employee;
hive>select
hive> id, name, size(skills) as size_of_each_array, array_contains(skills,"HADOOP") as knows_hadoop, sort_array(skills) as sorted_array from employee
hive>create table employee_map_data
hive> (
hive> id int,
hive> name string,
hive> details map<string,string>
hive> )
hive> row format delimited
hive> fields terminated by ','
hive> collection items terminated by '|'
hive> map keys terminated by ':';
hive> load data local inpath 'file:///tmp/hive_class/map_data.csv' into table employee_map_data;
hive>select
hive> id,
hive> name,
hive> details["gender"] as employee_gender
hive> from employee_map_data;
hive>select
hive> id,
hive> name,
hive> details,
hive> size(details) as size_of_each_map,
hive> map_keys(details) as distinct_map_keys,
hive> map_values(details) as distinct_map_values
hive> from employee_map_data;
https://www.kaggle.com/datasets/imdevskp/corona-virus-report
Notes of Hive big data :- iNeuron