Docker上安装Hadoop
# Docker安装Hadoop
# 1.安装前准备
- Hadoop安装包
这里使用hadoop-2.7.7.tar.gz
- Docker环境
- jdk安装包
这里使用jdk-8u202-linux-x64.tar.gz
# 2.配置容器基本环境
# 1.安装centos
docker pull centos:centos7.9
1
# 2.创建具有基本环境的centos镜像
vim ./Dockerfile
# 内容如下
FROM centos
MAINTAINER mwf
# 更换yum源
RUN cd /etc/yum.repos.d/
RUN sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-*
RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
# 安装wget
RUN yum -y install wget
# 通过wget更新yum源
RUN wget -O /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-vault-8.5.2111.repo
RUN yum clean all
RUn yum makecache
# 安装ssh
RUN yum install -y openssh
RUN yum install -y openssh-server
RUN sed -i 's/UsePAM yes/UsePAM no/g' /etc/ssh/sshd_config
RUN yum install -y openssh-clients
# 设置用户密码
RUN echo "root:726400sb" | chpasswd
RUN echo "root ALL=(ALL) ALL" >> /etc/sudoers
RUN ssh-keygen -t dsa -f /etc/ssh/ssh_host_dsa_key
RUN ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key
RUN mkdir /var/run/sshd
EXPOSE 22
CMD ["/usr/sbin/sshd", "-D"]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# 创建对应镜像
docker build -t="centos7-ssh" .
1
2
2
# 3.创建有hadoop和jdk环境的镜像
vim ./Dockerfile
# 内容如下
FROM centos7-ssh
ADD jdk-8u202-linux-x64.tar.gz /usr/local/
ENV JAVA_HOME /usr/local/jdk1.8.0_202
ENV PATH $JAVA_HOME/bin:$PATH
ADD hadoop-2.9.2.tar.gz /usr/local
ENV HADOOP_HOME /usr/local/hadoop-2.9.2
ENV PATH $HADOOP_HOME/bin:$PATH
RUN yum install -y which
1
2
3
4
5
6
7
8
9
10
11
12
2
3
4
5
6
7
8
9
10
11
12
# 创建镜像
docker build -t="hadoop" .
1
2
2
# 4.创建网络,并启动docker容器
# 创建网络,创建了一个名为hadoop-br的bridge类型的网络
docker network create --driver bridge hadoop-br
1
2
2
# 启动docker容器,并指定网络
docker run -itd --network hadoop-br --name master -p 50070:50070 -p 8088:8088 -p 9000:9000 hadoop-0
docker run -itd --network hadoop-br --name slave1 hadoop-0
docker run -itd --network hadoop-br --name slave2 hadoop-0
1
2
3
4
2
3
4
# 查看网络
docker network inspect hadoop-br
1
2
2
可以得出:
ip | 主机名 |
---|---|
172.18.0.2 | master |
172.18.0.3 | slave1 |
172.18.0.4 | slave2 |
# 5.配置域名映射与ssh免密登陆
1.修改主机名(每台都需要修改)(没有必要)
vi /etc/hostname
1
2.添加域名映射(没有必要)
vi /etc/hosts
1
3.ssh免密登陆
# 获取秘钥
ssh-keygen
1
2
2
# 分发秘钥
ssh-copy-id -i /root/.ssh/id_rsa -p 22 root@master
ssh-copy-id -i /root/.ssh/id_rsa -p 22 root@slave1
ssh-copy-id -i /root/.ssh/id_rsa -p 22 root@slave2
1
2
3
4
2
3
4
# 6.配置hadoop
1.配置hadoop sbin环境变量
vi ~/.bashrc
# 追加
export PATH=$PATH:$HADOOP_HOME/sbin
source ~/.bashrc
1
2
3
4
2
3
4
2.创建配置所需文件夹
mkdir /home/hadoop
mkdir /home/hadoop/tmp /home/hadoop/hdfs_name /home/hadoop/hdfs_data
1
2
2
3.修改配置文件
# core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/home/hadoop/tmp</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131702</value>
</property>
1
2
3
4
5
6
7
8
9
10
11
12
13
2
3
4
5
6
7
8
9
10
11
12
13
# hdfs-site.xml
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/hadoop/hdfs_name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/hadoop/hdfs_data</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:9001</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# cp mapred-site.xml.template mapred-site.xml
# mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
2
3
4
5
6
7
8
9
10
11
12
13
14
# yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# slaves
slave1
slave2
1
2
3
2
3
4.拷贝文件
scp -r ./* slave1:/usr/local/hadoop/etc/hadoop/
scp -r ./* slave2:/usr/local/hadoop/etc/hadoop/
scp -r /home/hadoop slave1:/home/
scp -r /home/hadoop slave2:/home/
1
2
3
4
2
3
4
5.在这里将容器生成镜像
docker commit 容器id 要生成的镜像名
1
# 7.启动Hadoop
1.格式化hdfs(在master上操作)
hdfs namenode -forma。
1
2.启动hdfs
start-dfs.sh
1
3.启动yarn
start-yarn.sh
1
可以看出来启动成功,接下来查看web界面
完成!
# 3.遇到的问题
1.容器启动的时候一定要提前想好要开放的端口
2.使用已经配置好的镜像重新创建容器后,容器的主机名会发生变化。
3.配置好的镜像已经初始化过了namenode,但是使用该镜像重新创建容器,就会导致namenode不可用,初步推断是由于主机名变化所导致
# 启动命令
docker run -itd --network hadoop-br --name master -p 50070:50070 -p 8088:8088 -p 9000:9000 master
docker run -itd --network hadoop-br --name slave1 slave1
docker run -itd --network hadoop-br --name slave2 slave2
1
2
3
4
2
3
4