Using docker to build Hadoop cluster

Time:2020-10-16

Create container

#Create a custom network
docker network create --subnet=172.18.0.0/16 mynetwork

#Start container
# cluster-master
docker run -itd --privileged  --name cluster-master -h cluster-master -p 18088:18088 -p 9870:9870 --net mynetwork --ip 172.18.0.2 centos:latest /usr/sbin/init
# cluster-slaves
docker run -itd --privileged --name cluster-slave1 -h cluster-slave1 --net mynetwork --ip 172.18.0.3 centos:latest /usr/sbin/init
docker run -itd --privileged --name cluster-slave2 -h cluster-slave2 --net mynetwork --ip 172.18.0.4 centos:latest /usr/sbin/init
docker run -itd --privileged --name cluster-slave3 -h cluster-slave3 --net mynetwork --ip 172.18.0.5 centos:latest /usr/sbin/init
# docker run -itd --privileged -v /System/Volumes/Data/data/personal/docker/hadoop/sys/fs/cgroup:/sys/fs/cgroup --name cluster-slave4 -h cluster-slave4 --net mynetwork --ip 172.18.0.6 centos:latest /usr/sbin/init

Install openssh password free login

Master install openssh

#Enter the master container
docker exec -it cluster-master /bin/bash
#Execute inside container
yum -y install openssh openssh-server openssh-clients vim

#Remove the file port 22 permitrotlogin yes passwordauthentication yes
vim /etc/ssh/sshd_config

#Start openssh
systemctl start sshd

#Generating SSH key
SSH keygen - t RSA

cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys

Slave install openssh (slave1, slave2, slave3 are installed in the same way)

#Enter the master container
docker exec -it cluster-slave1 /bin/bash
#Execute inside container
yum -y install openssh openssh-server openssh-clients vim

#Remove the file port 22 permitrotlogin yes passwordauthentication yes
vim /etc/ssh/sshd_config

#Start openssh
systemctl start sshd

#Use ~ /. SSH / * of master to replace ~ /. SSH of slave1/*

systemctl start sshd

Ansible installation

yum -y install epel-release
yum -y install ansible

#Modification
vi /etc/ansible/hosts
`
[cluster]
cluster-master
cluster-slave1
cluster-slave2
cluster-slave3

[master]
cluster-master

[slaves]
cluster-slave1
cluster-slave2
cluster-slave3
`

Modify hosts

:>/etc/hosts
cat >>/etc/hosts<<EOF
127.0.0.1   localhost
172.18.0.2  cluster-master
172.18.0.3  cluster-slave1
172.18.0.4  cluster-slave2
172.18.0.5  cluster-slave3
EOF

source ~/.bashrc

#Distribute to slave
ansible cluster -m copy -a "src=~/.bashrc dest=~/"

Install JDK and hooop (all four machines)

#Move the host JDK and Hadoop files to the docker container
docker cp ~/Download/hadoop-3.3.0. tar.gz   Container ID or container name / opt
docker cp ~/Download/jdk-8u261-linux-x64. tar.gz  Container ID or container name / opt

#Move file to slave
scp ./* [email protected]:/opt/
scp ./* [email protected]:/opt/
scp ./* [email protected]:/opt/

#Decompress
tar -zxvf hadoop-3.3.0.tar.gz
tar -zxvf jdk-8u261-linux-x64.tar.gz

#Configure environment variables 
vim ~/.bashrc
`
# hadoop
export HADOOP_HOME=/opt/hadoop-3.3.0
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH

#java
export JAVA_HOME=/opt/jdk1.8.0_261
export PATH=$JAVA_HOME/bin:$PATH
`
source ~/.bashrc

Configure the configuration files required for Hadoop to run