大数据之-hdfs+hive+hbase+kudu+presto集群(6节点)

几个主要软件的下载地址:
presto https://prestosql.io/docs/current/index.html
kudu rpm包地址 https://github.com/MartinWeindel/kudu-rpm/releases
hive http://mirror.bit.edu.cn/apache/hive/
hdfs http://archive.apache.org/dist/hadoop/core/
hbase https://hbase.apache.org/downloads.html
机器分布:

1.环境准备

ip

kudu

presto

hdfs

hive

zk

kafka

hbase

hostname

CPU

内存

192.168.1.10

master coordinator

namenode/datanode

 

 

 

master+phoenix

server1

8c

32g

192.168.1.11

master worker datanode /yarn  

 

 

regionserver

server2

8c

32g

192.168.1.12

master worker datanode 

 

 

 

regionserver

server3

8c

32g

192.168.1.13

tserver worker datanode   

ZK

kafka

regionserver

server4

8c

32g

192.168.1.14

tserver worker datanode 

 

ZK

kafka

regionserver

server5

8c

32g

192.168.1.15

tserver worker datanode  hive

ZK

kafka

regionserver

server6

8c

32g

                     

服务

端口

版本

               

hdfs

xx,xx

3.1.2

           

yarn

xxxx

3.1.2

             

presto

xxxx

337

               

kudu

xxxx

1.10.0

               

zk

xxxx

3.5.5

               

kafka

xxxx

2.12-2.2.1

             

hive dir

xxxx

3.1.2

               

hbase

xxxx

2.0.6

               

hbase客户端

phoenix

5.0.0

               

saltstack 批量操作

*注意:环境做系统基础优化+java环境+免密登录+saltstack安装(略)

 

配置hosts
vi /cat /etc/hosts
192.168.1.10     server1
192.168.1.11     server2
192.168.1.12     server3
192.168.1.13     server4
192.168.1.14    server5
192.168.1.14    server6

2.hdfs安装
2.0、配置系统环境变量/etc/profile
JAVA_HOME=/data/jdk1.8.0_211
JRE_HOME=/data/jdk1.8.0_211/jre
CLASS_PATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib
export JAVA_HOME JRE_HOME CLASS_PATH PATH
export JAVA_HOME=/data/jdk1.8.0_211
export HADOOP_HOME=/data/hadoop-3.1.2
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

2.1、hadoop-env.sh配置
export JAVA_HOME=/data/jdk1.8.0_171
export HADOOP_HOME=/data/hadoop-3.1.2
export PATH=$[PATH:/data/hadoop-3.1.2/bin](http://path/data/hadoop-3.1.2/bin)
export HADOOP_OPTS=”-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_COMMON_LIB_NATIVE_DIR”
 
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export HDFS_ZKFC_USER=root
export HDFS_JOURNALNODE_USER=root
2.3、core-site.xml 配置

   
            fs.defaultFS
        hdfs://server1:8020      
   
            io.file.buffer.size
        131072
   
            hadoop.tmp.dir
        /data/hadoop-3.1.2/tmp
   
         fs.trash.interval
    60
    
             hadoop.proxyuser.root.hosts
        *
   
            hadoop.proxyuser.root.groups
        *
   

2.4、hdfs-site.xml 配置

            dfs.replication
        1
   
            dfs.datanode.data.dir
        /data/hadoop-3.1.2/data/dfs/data
   
             dfs.namenode.name.dir
        /data/hadoop-3.1.2/dfs/name
    
            dfs.http.address
        server1:50070
   

2.5、/data/hadoop-3.1.2/sbin/start-dfs.sh、stop-dfs.sh 文件头部增加以下内容
HDFS_DATANODE_USER=root
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs

配置好之后,所有服务器新建用户:
useradd -m hadoop -G root -s /bin/bash
useradd -m hdfs -G root -s /bin/bash
useradd -m yarn -G root -s /bin/bash

修改命令执行用户,解决namenode重复启停 错问题
vi /data/hadoop-3.1.2/bin/hdfs
HADOOP_SHELL_EXECNAME=”root”

3. yarn安装

3.0、/data/hadoop-3.1.2/sbin/start-yarn.sh、stop-yarn.sh 文件头部增加以下内容
 YARN_RESOURCEMANAGER_USER=root
 HADOOP_SECURE_DN_USER=yarn
 YARN_NODEMANAGER_USER=root
3.1、mapred-site.xml配置

                         mapreduce.framework.name
                 yarn
       
                         mapreduce.jobhistory.address
                 server2:10030
       
                        mapreduce.jobhistory.joblist.cache.size
                10010
                default 20000
       

3.2、yarn-site.xml配置

             yarn.nodemanager.vmem-check-enabled
        false
    
                        yarn.nodemanager.aux-services
                mapreduce_shuffle
       
                        yarn.resourcemanager.hostname
                server2
       
                        yarn.resourcemanager.webapp.address
                server2:8888
       

3.3、hadoop-env.sh配置
 #YARN
 export YARN_RESOURCEMANAGER_USER=root
 export HDFS_DATANODE_SECURE_USER=yarn
 export YARN_NODEMANAGER_USER=root

 /data/hadoop-3.1.2/sbin/start-yarn.sh、stop-yarn.sh 文件头部增加以下内容
 YARN_RESOURCEMANAGER_USER=root
 HADOOP_SECURE_DN_USER=yarn
 YARN_NODEMANAGER_USER=root

3.4、workers 配置
server1
server2
server3
server4
server5
server6
3.5、分发配置文件到相应的节点启动
salt -N ‘hdfs’ cp.get_file salt://xxx /data/hadoop-3.1.2/xxx
3.6、 启动
执行格式化
hadoop namenode -format
启动
start-all.sh

 

4. hive安装
4.1.0 hive-site.xml配置

            hive.metastore.warehouse.dir
        /user/hive/warehouse
   
            hive.exec.mode.local.auto
        true
        Let Hive determine whether to run in local mode automatically
   
            javax.jdo.option.ConnectionURL
       jdbc:mysql://x.x.x.x:3306/hiveeateDatabaseInfoNotExist=true
   
            javax.jdo.option.ConnectionDriverName
        com.mysql.jdbc.Driver
   
            javax.jdo.option.ConnectionUserName
        hive
   
            javax.jdo.option.ConnectionPassword
        123456
   
   
          hive.cli.print.header
      true
   
   
          hive.cli.print.current.db
      true
   
    hive.server2.authentication
    NONE

4.1.1 hive-env.sh配置
HADOOP_HOME=/data/hadoop-3.1.2
export HIVE_CONF_DIR=/data/hive-3.1.2/conf
export HIVE_AUX_JARS_PATH=/data/hive-3.1.2/lib

4.1.2 分发配置文件
cp mysql-connector-java-5.1.46.jar /data/hive-3.1.2/lib
salt ‘server6’ cp.get_file salt://xxx /data/hive-3.1.2/conf/xxx
4.1.3 hdfs里创建目录
hadoop fs -mkdir -p /user/hive/warehouse
hadoop fs -chmod g+w /user/hive/warehouse
hadoop fs -mkdir -p /tmp
hadoop fs -chmod g+w /tmp

4.1.4 初始化并启动服务
主节点安装mysql
yum install -y mariadb-server 
systemctl start mariadb 
systemctl enable mariadb 
初始化mysql 
mysql_secure_installation 
创建hive元数据库
create database hive character set utf8 ;  
CREATE USER ‘hive’@’%’IDENTIFIED BY ‘123456’;
GRANT ALL PRIVILEGES ON *.* TO ‘hive’@’%’;
FLUSH PRIVILEGES;
#初始化hive元数据库,注意执行前检查mysql大小写是否敏感
schematool -dbType mysql -initSchema
#停止hive的2个服务
nohup /data/hive-3.1.2/bin/hive –service metastore -p 9083 &
nohup /data/hive-3.1.2/bin/hive –service hiveserver2 &
#停止hive的2个服务
kill -9 `ps -ef|grep hive|grep -v grep|awk ‘{print $2}’`

 
5.1#分发服务器安装包
salt -N ‘kudu’ cp.get_file salt://kudu-1.10.0-1.x86_64.rpm /data/kudu-1.10.0-1.x86_64.rpm
salt -N ‘kudu’ cp.get_file salt://cyrus-sasl-gssapi-2.1.26-23.el7.x86_64.rpm /data/cyrus-sasl-gssapi-2.1.26-23.el7.x86_64.rpm
salt -N ‘kudu’ cp.get_file salt://cyrus-sasl-plain-2.1.26-23.el7.x86_64.rpm /data/cyrus-sasl-plain-2.1.26-23.el7.x86_64.rpm
#安装2个依赖,否则 错
salt -N ‘kudu’ cmd.run ‘cd /data&&rpm -ivh cyrus-sasl-gssapi-2.1.26-23.el7.x86_64.rpm’
salt -N ‘kudu’ cmd.run ‘cd /data&&rpm -ivh cyrus-sasl-plain-2.1.26-23.el7.x86_64.rpm’

5.2#执行安装
salt -N ‘kudu’ cmd.run ‘cd /data&&rpm -ivh kudu-1.10.0-1.x86_64.rpm’
#分发配置文件
salt -N ‘km’ cp.get_file salt://master.gflagfile /etc/kudu/conf/master.gflagfile
salt -N ‘kt’ cp.get_file salt://tserver.gflagfile /etc/kudu/conf/tserver.gflagfile
#创建数据目录
salt -N ‘km’ cmd.run ‘mkdir -p /data/kudu/master/logs
 /data/kudu/master/wals /data/kudu/master/data’
salt -N ‘kt’ cmd.run ‘mkdir -p /data/kudu/tserver/logs
 /data/kudu/tserver/data /data/kudu/tserver/wals’
#修改目录权限
salt -N ‘kudu’ cmd.run ‘chown -R kudu:kudu /data/kudu’
 
5.3启动kudu-master
salt -N ‘km’ cmd.run ‘systemctl start kudu-master’
#检查节点服务情况
salt -N ‘km’ cmd.run ‘ps -ef|grep kudu’
#启动tserver
salt -N ‘kt’ cmd.run ‘systemctl start kudu-tserver’
#检查节点服务情况
salt -N ‘kt’ cmd.run ‘ps -ef|grep kudu’

6、presto337安装
salt -N ‘pt’ cp.get_file salt://jdk-11.0.6_linux-x64_bin.tar.gz /data/jdk-11.0.6_linux-x64_bin.tar.gz
salt -N ‘pt’ cmd.run ‘cd /data&&tar xf jdk-11.0.6_linux-x64_bin.tar.gz’

6.1、配置文件:
config.properties
主节点
coordinator=true
node-scheduler.include-coordinator=false
http-server.http.port=8080
discovery-server.enabled=true
discovery.uri=http://192.168.1.10:8080
query.max-memory=25GB
query.max-memory-per-node=5GB
query.max-total-memory-per-node=6GB
query.max-run-time=900s

从节点
coordinator=false
http-server.http.port=8080
discovery.uri=http://192.168.1.10:8080
query.max-memory=25GB
query.max-memory-per-node=5GB

#内存需预留30%给系统和满负载上浮
jvm.config
-server
-Xmx20G
-XX:+UseG1GC
-XX:G1HeapRegionSize=32M
-XX:+UseGCOverheadLimit
-XX:+ExplicitGCInvokesConcurrent
-XX:+HeapDumpOnOutOfMemoryError
-XX:OnOutOfMemoryError=kill -9 %p
-XX:+CMSClassUnloadingEnabled
-XX:+AggressiveOpts
-DHADOOP_USER_NAME=root
-Djdk.attach.allowAttachSelf=true
 

log.properties
com.facebook.presto=INFO

node.properties  注意node.id每节点需不一样
node.environment=presto
node.id=node_coordinator_10
node.data-dir=/data/presto-data

catalog目录下,数据源连接参数
hive.properties
connector.name=hive-hadoop2
hive.metastore.uri=thrift://192.168.1.16:9083
hive.config.resources=/data/hadoop-3.1.2/etc/hadoop/core-site.xml,/data/hadoop-3.1.2/etc/hadoop/hdfs-site.xml

kudu.properties

connector.name=kudu

## List of Kudu master addresses, at least one is needed (comma separated)
## Supported formats: example.com, example.com:7051, 192.0.2.1, 192.0.2.1:7051,
##                    [2001:db8::1], [2001:db8::1]:7051, 2001:db8::1
kudu.client.master-addresses=server1:7051,server2:7051,server3:7051

## Kudu does not support schemas, but the connector can emulate them optionally.
## By default, this feature is disabled, and all tables belong to the default schema.
## For more details see connector documentation.
kudu.schema-emulation.enabled=true

## Prefix to use for schema emulation (only relevant if `kudu.schema-emulation.enabled=true`)
## The standard prefix is `presto::`. Empty prefix is also supported.
## For more details see connector documentation.
##kudu.schema-emulation.prefix=

#######################
### Advanced Kudu Java client configuration
#######################

## Default timeout used for administrative operations (e.g. createTable, deleteTable, etc.)
#kudu.client.defaultAdminOperationTimeout = 30s

## Default timeout used for user operations
#kudu.client.defaultOperationTimeout = 30s

## Default timeout to use when waiting on data from a socket
#kudu.client.defaultSocketReadTimeout = 10s

## Disable Kudu client’s collection of statistics.
#kudu.client.disableStatistics = false

mongodb.properties
connector.name=mongodb
mongodb.seeds=192.168.1.17:27017
mongodb.credentials=root:admin@admin
#mongodb.socket-keep-alive=true

bin/launcher
在启动程序命令前增加jdk11参数

PATH=/data/jdk-11.0.6/bin/:$PATH
java -version
exec “$(dirname “$0″)/launcher.py” “$@”

1、文件分发
salt -N ‘pt’ cp.get_file salt://presto-server-337.tar.gz /data/presto-server-337.tar.gz
2、解压
salt -N ‘pt’ cmd.run ‘cd /data&&tar xf presto-server-337.tar.gz’
3、配置文件分发(因涉密详细配置略)
salt -N ‘pt’ cp.get_dir salt://etc /data/presto-server-337/
4、创建数据目录
salt -N ‘pt’ cmd.run ‘mkdir –p /data/presto-data’
5、执行文件授权
salt -N ‘pt’ cmd.run ‘chmod +x /data/presto-server-337/bin/*’
6、启动服务
salt -N ‘pt’ cmd.run ‘source /etc/profile&&/data/presto-server-337/bin/launcher start’

7、hbase 2.0 安装
7.1.0  hbase-env.sh 配置
export JAVA_HOME=/data/jdk1.8.0_211/
export HBASE_LOG_DIR=${HBASE_Data}/logs
export HBASE_MANAGES_ZK=false
#jvm内存参数,酌情配置
export HBASE_OPTS=”-XX:+UseConcMarkSweepGC”
export HBASE_HEAPSIZE=8G
export HBASE_MASTER_OPTS=”$HBASE_MASTER_OPTS -Xmx4g -Xms4g”
export HBASE_REGIONSERVER_OPTS=”$HBASE_REGIONSERVER_OPTS -Xms8g -Xmx8g”

7.1.1  hbase-site.xml 配置

                         hbase.master.info.port
                16000
        
                         hbase.tmp.dir
                /data/hbase-2.0.6/tmp
        
        
                         hbase.rootdir
                hdfs://server1:8020/hbase
        
        
                         hbase.zookeeper.quorum
                server4,server5,server6:2181
        
       
                         hbase.zookeeper.property.dataDir
                /data/hbase-2.0.6/data
        
        
                         hbase.cluster.distributed
                true
        
                         hbase.unsafe.stream.capability.enforce
                false
        
                         zookeeper.znode.parent
                /hbase
        

                          phoenix.schema.isNamespaceMappingEnabled
                true
         
                        phoenix.schema.mapSystemTablesToNamespace
              true
        

7.1.2  regionservers 配置
server1
server2
server3
server4
server5
server6

7.1.3 分发配置文件
salt -N ‘hbase’ cp.get_file salt://xxx /datat/hbase-2.0.6/xxx
salt -N ‘hbase’ cmd.run  ‘cd /data/hbase-2.0.6/conf&&ln -s /data/hadoop-3.1.2/etc/hadoop/hdfs-site.xml’
salt -N ‘hbase’ cmd.run  ‘cd /data/hbase-2.0.6/conf&&ln -s /data/hadoop-3.1.2/etc/hadoop/core-site.xml’
salt -N ‘hbase’ cmd.run  ‘mkdir /data/hbase-2.0.6/{data,logs,tmp}’

启动start-hbase.sh

7.1.4 安装  phoenix 5.0.0

主节点下载解压
复制jar到所有hbase节点
cp /data/phoenix-5.0.0/phoenix-5.0.0-HBase-2.0-server.jar /data/hbase-2.0.6/lib/

确认所有节点hbase-site.xml有参数phoenix.schema.isNamespaceMappingEnabled、phoenix.schema.mapSystemTablesToNamespace

cp /data/hbase-2.0.6/conf/hbase-site.xml /data/phoenix-5.0.0/bin/

进入cli
/data/phoenix-5.0.0/bin/sqlline.py
!tables

 

链接:https://blog.csdn.net/fly0512/article/details/100863889

 

 

 

声明:本站部分文章及图片源自用户投稿,如本站任何资料有侵权请您尽早请联系jinwei@zod.com.cn进行处理,非常感谢!

上一篇 2021年1月4日
下一篇 2021年1月4日

相关推荐