Hadoop集群部署原创
# 1. 介绍
本教程只部署Hadoop MapReduce非高可用计算集群,使用腾讯云对象存储COS代替HDFS,本集群包含一个ResourceManager,三个NodeManager和一个JobHistory Server。
# 2. 软件下载
- JDK下载:
版本:jdk-8u201-linux-x64.tar.gz
网址:https://www.oracle.com/technetwork/java/javase/downloads
- Hadoop下载:
版本:hadoop-3.3.1.tar.gz
网址:https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz
# 3. 部署环境
- 部署机器:
主服务器:hadoop-server1 (16核 64G内存 1T数据盘)
从服务器:hadoop-server2 (16核 64G内存 1T数据盘)
从服务器:hadoop-server3 (16核 64G内存 1T数据盘)
- Linux配置:
Linux版本:CentOS 7.8
Linux用户:hadoop
数据盘挂载目录:/data
- 安装包目录:
JDK安装包: ~/jdk-8u201-linux-x64.tar.gz
Hadoop安装包:~/hadoop-3.3.1.tar.gz
- 安装目录
JDK安装目录: /data/jdk1.8.0_201
Hadoop安装目录:/data/hadoop-3.3.1
# 4. JDK安装配置
# 4.1. JDK解压安装
$ cd /data
$ tar -zvxf ~/jdk-8u201-linux-x64.tar.gz
1
2
2
# 4.2. 配置JDK环境变量
$ vi ~/.bash_profile
#Java Environment
export JAVA_HOME=/data/jdk1.8.0_201
export PATH=$JAVA_HOME/bin:$PATH
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
$ source ~/.bash_profile
1
2
3
4
5
6
2
3
4
5
6
# 4.3. 检查JDK环境
$ java -version
java version "1.8.0_201"
Java(TM) SE Runtime Environment (build 1.8.0_201-b09)
Java HotSpot(TM) 64-Bit Server VM (build 25.201-b09, mixed mode)
1
2
3
4
2
3
4
# 5. 集群服务器免密配置
# 5.1. 主服务器生成密钥
$ ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
$ chmod 0600 ~/.ssh/authorized_keys
1
2
3
2
3
# 5.2. 复制密钥到其他服务器
$ scp -r ~/.ssh hadoop-server2:~
$ scp -r ~/.ssh hadoop-server3:~
1
2
2
# 5.3. 验证免密
$ ssh hadoop-server1
$ ssh hadoop-server2
$ ssh hadoop-server3
1
2
3
2
3
# 6. Hadoop MapReduce集群安装
- 除了5.2步骤,其他步骤均在主服务器完成
# 6.1. Hadoop解压安装
$ cd /data
$ tar -zvxf ~/hadoop-3.3.1.tar.gz
1
2
2
# 6.2. 配置Hadoop环境变量
$ vi ~/.bash_profile
#Hadoop Environment
export HADOOP_HOME=/data/hadoop-3.3.1
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
$ source ~/.bash_profile
1
2
3
4
5
2
3
4
5
# 6.3. 配置hadoop-env.sh
export JAVA_HOME=/data/jdk1.8.0_201
for f in $HADOOP_HOME/share/hadoop/tools/lib/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 6.4. 配置core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>cosn://腾讯云COS的bucket</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>${env.HADOOP_HOME}/tmp/hadoop-${user.name}</value>
</property>
<property>
<name>fs.cosn.credentials.provider</name>
<value>org.apache.hadoop.fs.cosn.auth.SimpleCredentialsProvider</value>
</property>
<property>
<name>fs.cosn.userinfo.secretId</name>
<value>腾讯云COS的secretId</value>
</property>
<property>
<name>fs.cosn.userinfo.secretKey</name>
<value>腾讯云COS的secretKey</value>
</property>
<property>
<name>fs.cosn.bucket.region</name>
<value>腾讯云COS的region</value>
</property>
<property>
<name>fs.cosn.impl</name>
<value>org.apache.hadoop.fs.cosn.CosNFileSystem</value>
</property>
<property>
<name>fs.AbstractFileSystem.cosn.impl</name>
<value>org.apache.hadoop.fs.cosn.CosN</value>
</property>
<property>
<name>fs.cosn.tmp.dir</name>
<value>${env.HADOOP_HOME}/tmp/hadoop_cos</value>
</property>
<property>
<name>fs.cosn.buffer.size</name>
<value>134217728</value>
</property>
<property>
<name>fs.cosn.block.size</name>
<value>134217728</value>
</property>
<property>
<name>fs.cosn.maxRetries</name>
<value>3</value>
</property>
<property>
<name>fs.cosn.retry.interval.seconds</name>
<value>3</value>
</property>
</configuration>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# 6.5. 配置mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${env.HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${env.HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${env.HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop-server1:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop-server1:19888</value>
</property>
</configuration>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# 6.6. 配置yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop-server1</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>53248</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>16</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>53248</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>16</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://hadoop-server1:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>
${env.HADOOP_HOME}/etc/hadoop,
${env.HADOOP_HOME}/share/hadoop/common/lib/*,
${env.HADOOP_HOME}/share/hadoop/common/*,
${env.HADOOP_HOME}/share/hadoop/hdfs,
${env.HADOOP_HOME}/share/hadoop/hdfs/lib/*,
${env.HADOOP_HOME}/share/hadoop/hdfs/*,
${env.HADOOP_HOME}/share/hadoop/mapreduce/*,
${env.HADOOP_HOME}/share/hadoop/yarn,
${env.HADOOP_HOME}/share/hadoop/yarn/lib/*,
${env.HADOOP_HOME}/share/hadoop/yarn/*,
${env.HADOOP_HOME}/share/hadoop/tools/lib/*
</value>
</property>
</configuration>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# 6.7. 配置workers文件
hadoop-server1 hadoop-server2 hadoop-server3
# 6.8. 复制到其他服务器
$ cd /data
$ scp -r hadoop-3.3.1 hadoop-server2:/data
$ scp -r hadoop-3.3.1 hadoop-server3:/data
1
2
3
2
3
# 7. Hadoop MapReduce集群启停
# 7.1. 启动Yarn
$ start-yarn.sh
1
ResourceManager访问地址:http://hadoop-server1:8088
# 7.2. 启动MapReduce JobHistory Server
$ mapred --daemon start historyserver
1
MapReduce JobHistory Server访问地址:http://hadoop-server1:19888
# 7.3. 停止Yarn
$ stop-yarn.sh
1
# 7.4. 停止MapReduce JobHistory Server
$ mapred --daemon stop historyserver
1
上次更新: 2024/06/28, 14:46:16
- 02
- 2025-03-28拍婚纱照 原创04-02
- 03
- 2024-04-05的晚上 原创04-01