[root@hadoop01 software]# cp /opt/software/hadoop-2.8.1-src/hadoop-dist/target/hadoop-2.8.1.tar.gz /opt/software/
[root@hadoop01 software]# tar -zxvf hadoop-2.8.1.tar.gz
配置环境变量
[root@hadoop01 hadoop-2.8.1]# vi /etc/profile
写入
export HADOOP_HOME=/opt/software/hadoop-2.8.1
export PATH=$HADOOP_HOME/bin:$PROTOC_HOME/bin:$FINDBUGS_HOME/bin:$MVN_HOME/bin:$JAVA_HOME/bin:$PATH
让配置生效
[root@hadoop01 hadoop-2.8.1]# source /etc/profile
[root@hadoop01 hadoop-2.8.1]# hadoop version
Hadoop 2.8.1
Subversion Unknown -r Unknown
Compiled by root on 2018-05-19T08:04Z
Compiled with protoc 2.5.0
From source with checksum 60125541c2b3e266cbf3becc5bda666
This command was run using /opt/software/hadoop-2.8.1/share/hadoop/common/hadoop-common-2.8.1.jar
[root@hadoop01 hadoop-2.8.1]#
[root@hadoop01 hadoop-2.8.1]# cd
[root@hadoop01 ~]# cd /opt/software/hadoop-2.8.1
[root@hadoop01 hadoop-2.8.1]# vi etc/hadoop/hadoop-env.sh
写入
export JAVA_HOME=/usr/java/jdk1.8.0_45
3. 启动hdfs
[root@hadoop01 hadoop-2.8.1]# sbin/start-dfs.sh
Starting namenodes on [localhost]
localhost: starting namenode, logging to /opt/software/hadoop-2.8.1/logs/hadoop-root-namenode-hadoop01.out
localhost: starting datanode, logging to /opt/software/hadoop-2.8.1/logs/hadoop-root-datanode-hadoop01.out
Starting secondary namenodes [0.0.0.0]
The authenticity of host '0.0.0.0 (0.0.0.0)' can't be established.
ECDSA key fingerprint is c4:8b:d9:92:fe:e2:85:dd:1e:06:dd:d7:e5:9e:a5:c4.
Are you sure you want to continue connecting (yes/no)? yes # 第一次启动时会要求输入一个yes
0.0.0.0: Warning: Permanently added '0.0.0.0' (ECDSA) to the list of known hosts.
0.0.0.0: starting secondarynamenode, logging to /opt/software/hadoop-2.8.1/logs/hadoop-root-secondarynamenode-hadoop01.out
[root@hadoop01 hadoop-2.8.1]# jps
10881 Jps
10434 NameNode
10746 SecondaryNameNode
10573 DataNode
[root@hadoop01 hadoop-2.8.1]#
[root@hadoop01 ~]# groupadd hadoop
[root@hadoop01 ~]# useradd -d /home/hadoop -g hadoop hadoop
[root@hadoop01 ~]# passwd hadoop
Changing password for user hadoop.
New password:
BAD PASSWORD: The password is shorter than 8 characters
Retype new password:
passwd: all authentication tokens updated successfully.
[root@hadoop01 ~]# id hadoop
uid=1000(hadoop) gid=1000(hadoop) groups=1000(hadoop)
[root@hadoop01 ~]#
修改hadoop安装目录的属主为hadoop
[root@hadoop01 ~]# cd /opt/software/
[root@hadoop01 software]# chown -R hadoop:hadoop hadoop-2.8.1
[root@hadoop01 software]# ls -ltrd hadoop-2.8.1
drwxr-xr-x. 10 hadoop hadoop 161 May 19 06:50 hadoop-2.8.1
[root@hadoop01 software]#
切换到hadoop用户,并配置hadoop用户免密登录(配置NN和NN无密码信任关系)
[hadoop@hadoop01 ~]$ su - hadoop
[hadoop@hadoop01 ~]$ ssh-keygen
[hadoop@hadoop01 ~]$ cd .ssh/
[hadoop@hadoop01 .ssh]$ cat id_rsa.pub >> authorized_keys
[hadoop@hadoop01 .ssh]$ ll
total 16
-rw-rw-r--. 1 hadoop hadoop 397 May 19 12:58 authorized_keys
-rw-------. 1 hadoop hadoop 1675 May 19 12:57 id_rsa
-rw-r--r--. 1 hadoop hadoop 397 May 19 12:57 id_rsa.pub
-rw-r--r--. 1 hadoop hadoop 183 May 19 12:58 known_hosts
[hadoop@hadoop01 .ssh]$ chmod 600 authorized_keys
[hadoop@hadoop01 .ssh]$ ssh 192.168.1.8 date
Sat May 19 13:00:47 EDT 2018
[hadoop@hadoop01 .ssh]$ ssh hadoop01 date
Sat May 19 13:00:54 EDT 2018
[hadoop@hadoop01 .ssh]$
[hadoop@hadoop01 hadoop-2.8.1]$ hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.8.1.jar pi 5 20
Number of Maps = 5
Samples per Map = 20
Wrote input for Map #0
Wrote input for Map #1
Wrote input for Map #2
Wrote input for Map #3
Wrote input for Map #4
Starting Job
18/05/20 02:03:39 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
18/05/20 02:03:39 INFO input.FileInputFormat: Total input files to process : 5
18/05/20 02:03:40 INFO mapreduce.JobSubmitter: number of splits:5
18/05/20 02:03:40 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1526752485560_0001
18/05/20 02:03:41 INFO impl.YarnClientImpl: Submitted application application_1526752485560_0001
18/05/20 02:03:41 INFO mapreduce.Job: The url to track the job: http://hadoop01:8088/proxy/application_1526752485560_0001/
18/05/20 02:03:41 INFO mapreduce.Job: Running job: job_1526752485560_0001
18/05/20 02:03:49 INFO mapreduce.Job: Job job_1526752485560_0001 running in uber mode : false
18/05/20 02:03:49 INFO mapreduce.Job: map 0% reduce 0%
18/05/20 02:04:01 INFO mapreduce.Job: map 100% reduce 0%
18/05/20 02:04:07 INFO mapreduce.Job: map 100% reduce 100%
18/05/20 02:04:08 INFO mapreduce.Job: Job job_1526752485560_0001 completed successfully
18/05/20 02:04:09 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=116
FILE: Number of bytes written=819855
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=1335
HDFS: Number of bytes written=215
HDFS: Number of read operations=23
HDFS: Number of large read operations=0
HDFS: Number of write operations=3
Job Counters
Launched map tasks=5
Launched reduce tasks=1
Data-local map tasks=5
Total time spent by all maps in occupied slots (ms)=49172
Total time spent by all reduces in occupied slots (ms)=4227
Total time spent by all map tasks (ms)=49172
Total time spent by all reduce tasks (ms)=4227
Total vcore-milliseconds taken by all map tasks=49172
Total vcore-milliseconds taken by all reduce tasks=4227
Total megabyte-milliseconds taken by all map tasks=50352128
Total megabyte-milliseconds taken by all reduce tasks=4328448
Map-Reduce Framework
Map input records=5
Map output records=10
Map output bytes=90
Map output materialized bytes=140
Input split bytes=745
Combine input records=0
Combine output records=0
Reduce input groups=2
Reduce shuffle bytes=140
Reduce input records=10
Reduce output records=0
Spilled Records=20
Shuffled Maps =5
Failed Shuffles=0
Merged Map outputs=5
GC time elapsed (ms)=2079
CPU time spent (ms)=5510
Physical memory (bytes) snapshot=1491693568
Virtual memory (bytes) snapshot=12667506688
Total committed heap usage (bytes)=1075314688
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=590
File Output Format Counters
Bytes Written=97
Job Finished in 29.719 seconds
Estimated value of Pi is 3.20000000000000000000
[hadoop@hadoop01 hadoop-2.8.1]$
Hadoop 伪分布式部署
在上一篇我们已经完成了Hadoop2.8.1源码编译,编译完成后得到一个hadoop-2.8.1.tar.gz包。下一步要做的是Hadoop的部署。
伪分布式部署方式的HDFS NameNode、DataNode、Secondary NameNode以及Yarn的Resource Manager和Node Manager全部运行在同一台主机上,因此部署过程中只需要一台主机。
1. 解压Hadoop
拷贝编译完成的hadoop tar包到/opt/software下解压
配置环境变量
让配置生效
2. 修改Hadoop配置文件
官网:http://hadoop.apache.org/docs/r2.8.4/hadoop-project-dist/hadoop-common/SingleCluster.html
2.1 配置core-site.xml
2.2 配置hdfs-site.xml
2.3 配置root账号本机免密登录
2.4 格式化hdfs文件系统
2.5 配置JAVA_HOME
3. 启动hdfs
启动日志中可以看到: namenode:localhost datanode:loalhost secondary namenode:0.0.0.0 其中,nn和dn是从localhost启动的,而snn是从0.0.0.0启动的。集群需要对外提供服务的话,服务要从主机的IP地址或者hostname启动,这时候我们需要修改一些配置。 修改core-site.xml(namenode配置)
修改slaves(datanode配置)
重启hdfs
查看namenode进程id,可以看到端口号对外监听的IP地址已经是本机的IP地址。
启动成功后,从浏览器访问http://192.168.1.8:50070/, 可以看到正常的hdfs页面。 这里如果浏览器无法正常访问hdfs web页面,检查hadoop主机防火墙是否允许50070端口通过。
至此,Hadoop部署完成。以上的部署过程中,一直使用超级管理员root用户去操作,是为了避免一些不必要的权限问题。在实际生产环境,我们应该使用hadoop/hdfs用户来运行hdfs。
4. 用hadoop用户重新部署hdfs
接着上面的步骤,停掉root用户运行的hdfs进程,然后切换到hadoop用户运行 停掉hdfs进程(root用户)
停掉hadoop相关进程并删除root用户运行hdfs时产生的日志文件(root用户)
新建hadoop用户组和用户
修改hadoop安装目录的属主为hadoop
切换到hadoop用户,并配置hadoop用户免密登录(配置NN和NN无密码信任关系)
前面只配置了NN和DN从hostname启动,SNN还是0.0.0.0,配置SNN从hostname启动
格式化hdfs
启动hdfs
同样,浏览器也可以正常访问http://192.168.1.8:50070/
至此,用hadoop用户重新部署hdfs完成。
5. Yarn 伪分布式部署(继续还是使用hadoop用户)
配置mapred-site.xml
配置yarn-site.xml
启动yarn
可以看到新增了两个java进程:ResourceManager和NodeManager。 此时,Yarn的Web UI页面可以正常查看:http://192.168.1.8:8088
6. MapReduce2
MapReduce本身并不启动任何java进程,只有向hdfs提交jar任务时才产生任务进程。 启动一个MapReduce任务
在Yarn的Web UI界面上可以看到对应的mapreduce作业执行成功,mapreduce跑在yarn上。
至此,整个Hadoop伪分布式部署完成。
注意事项