Hue 工具使用


Hue 是一个 Web 接口的 Hadoop 分析数据工具,由 Cloudra 公司开源

官方网址

Github 地址 -> 安装方法

文档地址

一.Build

1.ubuntu安装所需环境(以Github为准)

# JDK
# maven
# 其他环境
$ sudo apt-get install git ant gcc g++ libffi-dev libkrb5-dev libmysqlclient-dev libsasl2-dev libsasl2-modules-gssapi-mit libsqlite3-dev libssl-dev libxml2-dev libxslt-dev make maven libldap2-dev python-dev python-setuptools libgmp3-dev

2.build

$ make apps

二.配置

1.基础配置(位于官方文档3.1节)

secret_key=jFE93j;2[290-eiw.KEiwN2s3['d;/.q[eIW^y#e=+Iei*@Mn<qW5o

http_host=cen-ubuntu
http_port=8888

time_zone=Asia/Shanghai

2.WebHDFS 配置

# hdfs-site.xml(默认为true)
<property>
    <name>dfs.webhdfs.enabled</name>
    <value>true</value>
</property>

# core-site.xml 配置代理
<property>
    <name>hadoop.proxyuser.hue.hosts</name>
    <value>*</value>
</property>
<property>
    <name>hadoop.proxyuser.hue.groups</name>
    <value>*</value>
</property>

# hue.ini 配置 3 处,若配置 HA 需要配置 logical_name 
[hadoop]

  # Configuration for HDFS NameNode
  # ------------------------------------------------------------------------
  [[hdfs_clusters]]
    # HA support by using HttpFs

    [[[default]]]
      # Enter the filesystem uri
      fs_defaultfs=hdfs://cen-ubuntu:8020

      # NameNode logical name.
      ## logical_name=

      # Use WebHdfs/HttpFs as the communication mechanism.
      # Domain should be the NameNode or HttpFs host.
      # Default port is 14000 for HttpFs.
      webhdfs_url=http://cen-ubuntu:50070/webhdfs/v1

      # Change this if your HDFS cluster is Kerberos-secured
      ## security_enabled=false

      # In secure mode (HTTPS), if SSL certificates from YARN Rest APIs
      # have to be verified against certificate authority
      ## ssl_cert_ca_verify=True

      # Directory of the Hadoop configuration
      hadoop_conf_dir=/opt/cdh5.3.6/hadoop-2.6.0-cdh5.12.0/etc/hadoop

3.YARN 配置

# hue.ini
[[yarn_clusters]]

  [[[default]]]
    # Enter the host on which you are running the ResourceManager
    resourcemanager_host=cen-ubuntu

    # The port where the ResourceManager IPC listens on
    resourcemanager_port=8032

    # Whether to submit jobs to this cluster
    submit_to=True

    # Resource Manager logical name (required for HA)
    ## logical_name=

    # Change this if your YARN cluster is Kerberos-secured
    ## security_enabled=false

    # URL of the ResourceManager API
    resourcemanager_api_url=http://cen-ubuntu:8088

    # URL of the ProxyServer API
    proxy_api_url=http://cen-ubuntu:8088

    # URL of the HistoryServer API
    history_server_api_url=http://cen-ubuntu:19888

    # URL of the Spark History Server
    ## spark_history_server_url=http://localhost:18088

    # In secure mode (HTTPS), if SSL certificates from YARN Rest APIs
    # have to be verified against certificate authority
    ## ssl_cert_ca_verify=True

4.临时文件目录

[filebrowser]
  # Location on local filesystem where the uploaded archives are temporary stored.
  archive_upload_tempdir=/tmp

5.Hive 配置(需要启动Hive server2 服务 启动 Hive 服务)

# hive-site.xml
<!-- 配置server2 的地址和端口 -->
<property>
  <name>hive.server2.thrift.port</name>
  <value>10000</value>
  <description>Port number of HiveServer2 Thrift interface when hive.server2.transport.mode is 'binary'.</description>
</property>

<property>
  <name>hive.server2.thrift.bind.host</name>
  <value>cen-ubuntu</value>
  <description>Bind host on which to run the HiveServer2 Thrift service.</description>
</property>

# 启动hiveserver2
$ bin/hiveserver2 

# hive-site.xml
<!-- 配置远程 remote metastore 的uri 见hive官方文档-->
<property>
  <name>hive.metastore.uris</name>
  <value>thrift://cen-ubuntu:9083</value>
</property>
  
# 启动 metastore server
hive --service metastore

# hue.ini
[beeswax]

  # Host where HiveServer2 is running.
  # If Kerberos security is enabled, use fully-qualified domain name (FQDN).
  hive_server_host=cen-ubuntu

  # Port where HiveServer2 Thrift server runs on.
  hive_server_port=10000

  # Hive configuration directory, where hive-site.xml is located
  hive_conf_dir=/opt/cdh5.3.6/hive-1.1.0-cdh5.12.0/conf

  # Timeout in seconds for thrift calls to Hive service
  server_conn_timeout=120

6.database 链接管理关系型数据库(SQLite3 是 que 自带的数据库)(注意:需要删除[[[xxx]]]前注释)

###########################################################################
# Settings for the RDBMS application
###########################################################################

[librdbms]
  # The RDBMS app can have any number of databases configured in the databases
  # section. A database is known by its section name
  # (IE sqlite, mysql, psql, and oracle in the list below).

  [[databases]]
    # sqlite configuration.
    ## [[[sqlite]]]
      # Name to show in the UI.
      nice_name=SQLite

      # For SQLite, name defines the path to the database.
      name=/opt/cdh5.3.6/hue-3.9.0-cdh5.12.0/desktop/desktop.db

      # Database backend to use.
      engine=sqlite

      # Database options to send to the server when connecting.
      # https://docs.djangoproject.com/en/1.4/ref/databases/
      ## options={}

    # mysql, oracle, or postgresql configuration.
    [[[mysql]]]
      # Name to show in the UI.
      nice_name="My SQL DB"

      # For MySQL and PostgreSQL, name is the name of the database.
      # For Oracle, Name is instance of the Oracle server. For express edition
      # this is 'xe' by default.
      name=mysqldb

      # Database backend to use. This can be:
      # 1. mysql
      # 2. postgresql
      # 3. oracle
      engine=mysql

      # IP or hostname of the database to connect to.
      host=cen-ubuntu

      # Port the database server is listening to. Defaults are:
      # 1. MySQL: 3306
      # 2. PostgreSQL: 5432
      # 3. Oracle Express Edition: 1521
      port=3306

      # Username to authenticate with when connecting to the database.
      user=root

      # Password matching the username to authenticate with when
      # connecting to the database.
      password=ubuntu

      # Database options to send to the server when connecting.
      # https://docs.djangoproject.com/en/1.4/ref/databases/
      ## options={}

7.Oozie 配置

[liboozie]
  # The URL where the Oozie service runs on. This is required in order for
  # users to submit jobs. Empty value disables the config check.
  oozie_url=http://cen-ubuntu:11000/oozie

  # Requires FQDN in oozie_url if enabled
  ## security_enabled=false

  # Location on HDFS where the workflows/coordinator are deployed when submitted.
  remote_deployement_dir=/user/cen/examples/apps

  [oozie]
    # Location on local FS where the examples are stored.
    local_data_dir=/opt/cdh5.3.6/oozie-4.1.0-cdh5.12.0/examples

    # Location on local FS where the data for the examples is stored.
    sample_data_dir=/opt/cdh5.3.6/oozie-4.1.0-cdh5.12.0/examples/input-data

    # Location on HDFS where the oozie examples and workflows are stored.
    # Parameters are $TIME and $USER, e.g. /user/$USER/hue/workspaces/workflow-$TIME
    remote_data_dir=/user/cen/examples/apps/

三.运行

# 0.0.0.0意味着所有ip都能访问,本来是在hue.ini中配置的,但是配置不生效,因此手动设置
$ build/env/bin/hue runserver 0.0.0.0:8000
posted @ 2017-07-31 02:58  岑忠满  阅读(6630)  评论(0编辑  收藏  举报