跳到主要内容

hadoop安装

· 阅读需 4 分钟
GavinTan
DevOps Engineer

zookeeper 集群

zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/data/zookeeper
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1

## Metrics Providers
#
# https://prometheus.io Metrics Exporter
#metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider
#metricsProvider.httpPort=7000
#metricsProvider.exportJvmInfo=true
#当前节点配置0.0.0.0
server.1=0.0.0.0:2888:3888
server.2=172.16.7.15:2888:3888
server.3=172.16.7.16:2888:3888

创建myid与server.后面的标识对应

echo 1 > /data/zookeeper/myid

hadoop ha集群

配置文件

peicore-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
<description>namenode地址,配置ha后应配置成ha nameservice名称</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/hadoop</value>
<description>hadoop文件存放路径的根目录,nn dn默认会存储在该位置</description>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
<description>在序列中使用的缓冲区大小,以byte为单位,默认值是4KB</description>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>bigdata</value>
<description>在网页界面访问hdfs使用的用户名,配置与启动hadoop同样用户才有权限访问hdfs</description>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>172.16.7.14:2181,172.16.7.15:2181,172.16.7.16:2181</value>
<description>配置zookeeper集群地址</description>
</property>
</configuration>
hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
<description>副本数,HDFS存储时的备份数量</description>
</property>
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
<description>配置ha nameservice名称</description>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
<description>设置NameNode ID列表进行</description>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>172.16.7.14:8020</value>
<description>设置nn1的NameNode进程的地址和IPC端口</description>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>172.16.7.15:8020</value>
<description>设置nn2的NameNode进程的地址和IPC端口</description>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>172.16.7.14:9870</value>
<description>设置nn1的NameNode的web ui地址</description>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>172.16.7.15:9870</value>
<description>设置nn2的NameNode的web ui地址</description>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://172.16.7.14:8485;172.16.7.15:8485/mycluster</value>
<description>指定NameNode的元数据在JournalNode上的存放位置</description>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/hadoop/journaldata</value>
<description>指定JournalNode在本地磁盘存放数据的位置</description>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
<description>开启NameNode自动故障转移</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
<description>配置故障转移代理类</description>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<description>隔离方法故障转移期间用来隔离Active NameNode,sshfence-SSH到Active NameNode使用fuser终止进程防止存在多个Active NameNode</description>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/bigdata/.ssh/id_rsa</value>
<description>使用sshfence隔离机制时必须ssh免密登陆,配置SSH私钥文件</description>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
<description>sshfence隔离方法超时时间,以毫秒为单位</description>
</property>
</configuration>
mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>用于执行MapReduce作业的运行时框架默认local</description>
</property>
<property>
<name>mapreduce.admin.user.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
<description>可以设置AM【AppMaster】端的环境变量,如果上面缺少配置,可能会造成mapreduce失败</description>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
<description>可以设置AM【AppMaster】端的环境变量,如果上面缺少配置,可能会造成mapreduce失败</description>
</property>

</configuration>
yarn-site.xml
<configuration>

<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>自定义服务配置MapReduce运行须配置成mapreduce_shuffle</description>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
<description>ResourceManager的主机名</description>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>172.16.7.14:8088</value>
<description>yarn web ui地址</description>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
<description>启用日志聚合功能,日志聚合开启后保存到HDFS上</description>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
<description>聚合后的日志在HDFS上保存时间</description>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://172.16.7.14:19888/jobhistory/logs</value>
<description>日志聚合服务器的URL</description>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/tmp/logs</value>
<description>日志在HDFS上存储路径</description>
</property>
</configuration>

配置工人节点DataNode和NodeManager

vim /etc/hadoop/workers
172.16.7.15
172.16.7.16

启动

#启动zookeeper集群,在所有zookeeper节点上执行
./bin/zkServer.sh start

#启动jurnalnode进程,在配置的所有namenode节点执行
./bin/hdfs --daemon start journalnode

#格式化namenode,在配置的namenode其中任意一台上执行,仅需要第一次启动集群执行
./bin/hdfs namenode -format
./bin/hdfs --daemon start namenode

#同步namenode元数据,在未执行格式化的其他namenode节点上执行
./bin/hdfs namenode -bootstrapStandby

#启动MR历史记录
./bin/mapred --daemon start historyserver

#在ZooKeeper中初始化HA状态,在配置的namenode其中任意一台上执行
./bin/hdfs zkfc -formatZK

#启动zkfc
./bin/hdfs --daemon start zkfc

#启动所有服务
./sbin/start-all.sh

常用操作

#查看集群状态
./bin/hadoop dfsadmin -report

#停掉namenode
./bin/hdfs --daemon stop namenode

#执行wordcount例子
./bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.1.jar wordcount /input /output

ambari部署(管理监控hadoop)

编译

yum install maven rpm-build

wget https://www-eu.apache.org/dist/ambari/ambari-2.7.5/apache-ambari-2.7.5-src.tar.gz
tar xfvz apache-ambari-2.7.5-src.tar.gz
cd apache-ambari-2.7.5-src
mvn versions:set -DnewVersion=2.7.5.0.0

pushd ambari-metrics
mvn versions:set -DnewVersion=2.7.5.0.0
popd

#RHEL (CentOS 7) & SUSE (SLES 12 SP2 & SP3)
mvn -B clean install rpm:rpm -DnewVersion=2.7.5.0.0 -DbuildNumber=5895e4ed6b30a2da8a90fee2403b6cab91d19972 -DskipTests -Dpython.ver="python >= 2.6" -Drat.skip=true

#Ubuntu/Debian
mvn -B clean install jdeb:jdeb -DnewVersion=2.7.5.0.0 -DbuildNumber=5895e4ed6b30a2da8a90fee2403b6cab91d19972 -DskipTests -Dpython.ver="python >= 2.6" -Drat.skip=true

安装

#RHEL (CentOS 7) & SUSE (SLES 12 SP2 & SP3)
yum install ambari-server/target/rpm/ambari-server/RPMS/noarch/ambari-server*.rpm

yum install ambari-agent/target/rpm/ambari-agent/RPMS/x86_64/ambari-agent*.rpm

#Ubuntu/Debian
apt-get install ambari-server/target/rpm/ambari-server/RPMS/noarch/ambari-server*.deb

apt-get install ambari-agent/target/rpm/ambari-agent/RPMS/x86_64/ambari-agent*.deb

启动运行

ambari-server setup

ambari-server start
ambari-agent start

访问

http://<ambari-server-host>:8080   admin/admin
提示

bower error Unexpected token [ERROR] Failed to execute goal org.codehaus.mojo:exec-maven-plugin:1.2.1:exec (Bower install) on project ambari-admin: Command execution failed. Process exited with an error: 1 (Exit value: 1) -> [Help 1]

vi ambari-admin/pom.xml
<argument>${basedir}/src/main/resources/ui/admin-web/node_modules/bower/bin/bower</argument>
改为
<argument>bower</argument>