Open zyhxq opened 7 years ago
sudo mv sqoop-1.4.6.bin__hadoop-2.0.4-alpha /opt
vi /home/hadoop/.bashrc 生效 source /home/hadoop/.bashrc
export SQOOP_HOME=/opt/sqoop-1.4.6.bin__hadoop-2.0.4-alpha
export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/hadoop-2.4.1/bin:/opt/hadoop-2.4.1/sbin:/opt/hbase-1.1.5/bin:/opt/apache-hive-2.0.0-bin/bin:$SQOOP_HOME/bin:$PATH
配置文件 sqoop-env.sh 修改
export HADOOP_COMMON_HOME=/opt/hadoop-2.4.1
export HADOOP_MAPRED_HOME=/opt/hadoop-2.4.1
export HBASE_HOME=/opt/hbase-1.1.5
export HIVE_HOME=/opt/apache-hive-2.0.0-bin
copy mysql 驱动到sqoop_home/lib目录 sudo cp /home/shiyanlou/mysql-connector-java-5.1.35.jar /opt/sqoop-1.4.6.bin__hadoop-2.0.4-alpha/lib/mysql-connector-java-5.1.35.jar
连接MYSQL数据库 sqoop list-databases --connect jdbc:mysql://localhost/ --username root 在实际环境中需要输入密码,则需要在 --username root 之后增加参数 -P ,并在提示输入密码时输入密码。
导入数据到HSFS 向数据库提供密码-不安全的方法 将密码保存在用户主目录下的一个文件中,权限设置为为400,使用--password-file参数来指定文件的路径使用。Sqoop 将从文件中读取密码并将其传递给MapReduce 集群。含有密码的文件可以保存在本地FS或HDFS上 $ sqoop import --connect jdbc:mysql://database.example.com/employees \ --username venkatesh --password-file ${user.home}/.password
向数据库提供密码-不安全的方法 sqoop import --connect jdbc:mysql://database.example.com/employees \ --username aaron --password 12345
将能用到的库导入到HSFS hdfs dfs -mkdir hdfs://localhost:9000/opt/ $ hdfs dfs -mkdir hdfs://localhost:9000/opt/sqoop-1.4.6.binhadoop-2.0.4-alpha/ $ hdfs dfs -mkdir hdfs://localhost:9000/opt/sqoop-1.4.6.binhadoop-2.0.4-alpha/lib $ hdfs dfs -copyFromLocal /opt/sqoop-1.4.6.binhadoop-2.0.4-alpha/ hdfs://localhost:9000/opt/sqoop-1.4.6.binhadoop-2.0.4-alpha/ $ hdfs dfs -copyFromLocal /opt/sqoop-1.4.6.binhadoop-2.0.4-alpha/lib hdfs://localhost:9000/opt/sqoop-1.4.6.binhadoop-2.0.4-alpha/lib $ hdfs dfs -copyFromLocal /opt/sqoop-1.4.6.binhadoop-2.0.4-alpha/sqoop-1.4.6.jar hdfs://localhost:9000/opt/sqoop-1.4.6.binhadoop-2.0.4-alpha/sqoop-1.4.6.jar
导入表 $ sqoop import --connect jdbc:mysql://localhost/Corp --username root --table employee
指定列 sqoop import --connect jdbc:mysql://localhost/Corp --username root --table employee_address \ --columns "_id,number,city"
控制并行导入(8个并行任务)
在重复导入 employee 表到 HDFS 之前,需要删除已有的内容。否则会提示该表已存在而报错
hdfs dfs -rmr hdfs://localhost:9000/user/hadoop/employee
$ sqoop import --connect jdbc:mysql://localhost/Crop --username root --table employee \ -m 8
SequenceFiles格式存储 sqoop import --connect jdbc:mysql://localhost/Corp --username root --table employee \ --class-name com.shiyanlou.employee --as-sequencefile
文本模式导入指定分隔符 hdfs dfs -rm -r hdfs://localhost:9000/user/hadoop/employee
$ sqoop import --connect jdbc:mysql://localhost/Corp --username root --table EMPLOYEES \ --fields-terminated-by '\t' --lines-terminated-by '\n' \ --optionally-enclosed-by '\"'
导入数据到hive sqoop import --connect jdbc:mysql://localhost/Corp --table EMPLOYEES \ --hive-import
条件过滤(导入在2010-01-01之后入职的员工信息) sqoop import --connect jdbc:mysql://localhost/Corp --table EMPLOYEES \ --where "start_date > '2010-01-01'"
拆分列(dept_id 作为分隔字段)
sqoop import --connect jdbc:mysql://localhost/Corp --table EMPLOYEES \ --split-by dept_id
追加导入(已经导入100,000条数据)
sqoop import --connect jdbc:mysql://localhost/somedb --table sometable \ --where "id > 100000" --target-dir /incremental_dataset --append
导入数据库中所有的表 hdfs dfs -rm -r hdfs://localhost:9000/user/hadoop/employee_address
$ sqoop import-all-tables --username root --connect jdbc:mysql://localhost/Corp
验证导入是否成功 hdfs dfs -ls employee
wget http://labfile.oss.aliyuncs.com/courses/575/sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz tar zxvf sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz