1. 安装center os
docker pull centos
[root@localhost ~]# docker search centos
NAME DESCRIPTION STARS OFFICIAL AUTOMATED
centos The official build of CentOS. 6973
如果拉比较慢,添加镜像索引地址
/etc/docker/daemon.json
{
"registry-mirrors": [ "https://ffte7po1.mirror.aliyuncs.com", "http://hub-mirror.c.163.com","https://registry.docker-cn.com" ]
}
2. 运行 centos 容器; (先不写主机)
docker run --privileged=true -itd --name=bigwork -p 50070:50070 \
-v /usr/local/docker/bigdatas/softs:/root/bigdatas/softs \
centos /sbin/init
–privileted=true以及/sbin/init 写法,是因为进入容器后 systemctl 无法使用;
3. 进入容器;
docker exec -it bigwork /bin/bash
4. 配置JAVA,在~/.bashrc
tar -xvf xxxxx.tar.gz -C /usr/local/java
cd /usr/local/java/xxxxx
pwd
cd ~/.bashrc
export JAVA_HOME=/usr/local/java/jdk1.8.0_141
export JRE_HOME=$JAVA_HOME/jre
export PATH=$JAVA_HOME/bin:$PATH
source ~/.bashrc
>java -version
5. 安装 ssh 服务及客户环境;
a. 首先检查在当前是有安装ssh
[root@localhost docker]# rpm -qa | grep ssh
openssh-server-7.4p1-21.el7.x86_64
openssh-clients-7.4p1-21.el7.x86_64
libssh2-1.8.0-4.el7.x86_64
openssh-7.4p1-21.el7.x86_64
如果没有信息,执行
yum install -y openssh
// 或者
yum install -y openssh*
配置 sshd_config
>vi /etc/ssh/sshd_config
将以下几项打开:
PermitRootLogin、
RSAAuthentication、
PubkeyAuthentication的设置打开(值设为 yes)
—
通过rpm -qa | grep ssh 看下 ssh 的版本,如果是 7.3 以上版本的,RSAAuthentication 不存在;
启动重启sshd
systemctl start sshd
systemctl enable sshd
--
/usr/sbin/sshd -t //重启
systemctl status sshd
ps -ef | grep sshd
参考: https://www.pianshen.com/article/4448212020/
ssh 7.3 + 以下两项不必处理
UsePAM yes 改为 UsePAM no
UsePrivilegeSeparation sandbox 改为 UsePrivilegeSeparation no
登录验证:
ssh localhost
可能会提示,ssh 找不到命令,执行安装 sshopen-clients
yum -y install openssh-clients
然后再 >ssh localhost
这步需要输入密码,首先需要设置密码
passwd root 回车,输入两次密码;
如果找不到 passwd 命令:
yum install passwd
6. 安装 hadoop
tar xxxx.tar.gz -C /usr/local/
cd /usr/local/
mv hadoop-2.7.xxxx hadoop
cd hadoop/
cd bin
./hadoop version
7. 配置 hadoop 文件; /hadoop/etc/hadoop/
a. core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/hadoop.tmp</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://master:9000</value>
</property>
</configuration>
b. hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<!--
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/data/hadoop_data/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data/hadoop_data/hdfs/datanode</value>
</property>
-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:50090</value>
</property>
</configuration>
c. mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
</configuration>
d. yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8050</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8025</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
e. slaves
该 文件 内输入两行节点: 一行一个
slave1
slave2
8. 环境变量:
#hadoop
export HADOOP_HOME=/usr/local/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
export JAVA_LIBRARY_PATH=$HADOOP_COMMON_LIB_NATIVE_DIR
9. 配置IP信息;
vi /etc/hosts
172.17.0.2 master
172.17.0.3 slave1
172.17.0.4 slave2
========================================================
退出 容器;
> exec
查看容器id
docker ps -a
提示当前容器为一个镜像;
docker commit 容器id bigwork/datas:v1
这时 docker images 后,会多出一个 bigwork/datas:v1 的镜像;
将该 镜像 推送到公共仓库; (需要 docker login), 注:docker login 其他仓库不行;
docker tag bigwork/datas:v1 glootz/bigwork:datas-v1
docker push glootz/bigwork:datas-v1
可选参考:
# 删除刚刚创建的镜像及容器;
docker stop bigwork
docker rm bigwork
docker images # 查看与bigwork有关的镜像的镜像id;
docker rmi 镜像id -f # 如无法删除加 "-f"
重新从远程仓库上拉,然后运行三个容器,一主二从;
容器的IP按顺序生成172.12.0.2, 3, 4,因此从master 开始运行;
docker pull glootz/bigwork:datas-v1
docker run --privileged=true -itd --name=master -h=master -p 50070:50070 \
-v /usr/local/docker/bigdatas/softs:/root/bigdatas/softs
glootz/bigwork:datas-v1 /sbin/init
--
docker run ... -name=slave1 -h=slave1...
docker run ... -name=slave2 -h=slave2...
# 20235-5-9
# 本地配置好基础hadoop容器后(作为基础镜像用的),docker commit 会在本地生成 镜像,可以直接使有
# 如果有需要再push到远程;
# 使用 新镜像 不用再指定 -v 及 -p 端口
进入三个的容器,看下 /etc/hosts 是否有三行,没有的话就补上;
172.17.0.2 master
172.17.0.3 slave1
172.17.0.4 slave2
—
创建相互之间的免密登录:
a. master
# 进入 ~/.ssh/ ,没有要创建
]# cd ~/.ssh/
]# ssh-keygen -t rsa
# 都回车;
]# ls
id_rsa id_rsa.pub
]# cat id_rsa.pub >> authorized_keys
# 拷贝到slave1,2
]# scp authorized_keys slave1:~/.ssh/ # 这个目录也要创建好,需要输入密码
# 同步 slave2 拷;
]# ssh slave1 登录成功;
========================
# slavex 免密码到 master, 同样在 slavex的 ~/.ssh/目录
]# ssh-keygen -t rsa #回车...
]# mv id_rsa.pub id_rsa_salvex.pub
]# scp id_rsa_slavex.pub master:~/.ssh/
===================
# 在 master 上执行;
]# cat id_rsa_slavex.pub >> authorized_keys
====================
# 在 slavex 上执行; (2023, 从到主的ssh可能不需要)
]# ssh master #免密登录;
----
# slavex 之间免密也同样的操作
========================================
运行 hadoop
1. 如果不是首次运行的,需要删除 $HADOOP_HOME/ hadoopTmp 文件夹,及 logs 下所有文件;
master, slave1, slave2 都删除 ;
2. 进入master hadoop_home/bin
./hdfs namenode -format
--- 再切到 hadoop_home/sbin目录;
./start-all.sh