elasticsearch.yml配置内容详解

关于es的现实生成环境可以相应修改配置内容。

cluster.name: elasticsearch
node.name: bigdata01
network.host: 172.16.58.21
http.port: 9200
transport.tcp.port: 9300
node.master: true
node.data: true
discovery.zen.ping.multicast.enabled: false             # 关闭多播
discovery.zen.ping.unicast.hosts: ["172.16.58.21:9300"]
                                                        # 设置此节点定时ping的ip，只应该配置主节点的ip，若配了非主节点ip，
                                                        # 该节点启动时有可能报 MasterNoDiscoveryException 错误，导致加不入集群
node.max_local_storage_nodes: 10                        # 同一个服务器上最多启动多少个es节点

discovery.zen.fd.ping_interval: 1s                      # 默认1s，节点互ping的时间间隔
discovery.zen.fd.ping_timeout: 30s                      # 默认30s，每次ping主节点时最长等待响应时间
discovery.zen.fd.ping_retries: 6                        # 默认3，ping失败重试次数，超过此次数则认为对方节点已停止工作

path.data: /opt/es/data                                 # 数据目录
path.logs: /opt/es/logs                                 # 日志目录
path.plugins: /opt/es/plugins                           # 插件目录

discovery.zen.minimum_master_nodes: 1                   # 至少1个节点才能组成集群，
                                                        # N个节点的情况下此值应该设为 (N + 1) / 2

index.number_of_shards: 16                              # 索引默认分片数
index.number_of_replicas: 2                             # 默认副本数

http.cors.allow-origin: "*"                             # *号表示接收任意ip的http请求
http.cors.enabled: true                                 # 默认false，允许集群外的http请求

bootstrap.mlockall: true                                # 启动节点时一次性分配 ES_HEAP_SIZE 值的内存给ES，保持内存占用

action.disable_delete_all_indices: false                # 禁止删除索引

threadpool.bulk.queue_size: 3000                        # 在没有更多线程来处理批量请求时，ES节点队列中等待处理的请求数，
                                                        # 注意，在ES中，一个块请求里包含N个分片的数据时，将占用队列中N个位置
                                                        # 因此就算只发送一个块请求，queue_size都应该设置超过N，
                                                        # 若每次都发此类的块，批量提交M个，则queue_size值应该大于 M * N
                                                        # 若队列已满，再次请求时会报 RemoteTransportException 异常，若客户端不处理该异常，
                                                        # 则此批数据会丢失。
                                                        # 此队列会消耗JVM堆的空间，应视实际硬件情况设置该值

index.translog.flush_threshold_size: 1g                 # 默认 512m，事务数据缓存大小，达到该值时提交并更新索引
index.translog.interval: 10s                            # 检查写入数据是否达到提交大小的时间间隔，默认5s
index.translog.flush_threshold_period: 60m              # 每隔多长时间执行一次flush，默认 60m
indices.memory.index_buffer_size: 20%                   # 每个节点写入索引数据时可以使用的内存buffer缓冲
http.max_content_length: 300M                           # 每次http请求的最大内容长度，更大值能使一次http请求包含更多数据，可以减少请求次数，提高写入吞吐量
index.refresh_interval: 1s                              # 索引刷新时间间隔，默认1s，不关注新数据的实时查询时可以调大该参数，-1 表示不刷新
indices.store.throttle.type: merge                      # 存储的节流阀，大量数据导入时设置为none放开此限制，尽可能利用磁盘的I/O能力
indices.store.throttle.max_bytes_per_sec: 100M          # 每秒最大I/O流量，SSD硬盘180-200M；HDD硬盘40M（接近于两种硬盘在SATA2.0接口上的最大传输速率）
index.store.compress.stored: true                       # 导入数据时使用压缩存储，减小索引大小，加快检索速度

indices.cache.filter.size: 20%                          # 用于过滤的查询缓存的内存大小，建议不小于10G，不大于30%
indices.cache.filter.expire: 5m                         # 用于过滤的查询缓存的失效时间
indices.cache.qeury.size: 3%                            # 用于查询的缓存的内存大小，shard级别的缓存，不宜太大，建议 1% ~ 3%
indices.cache.query.expire: 5m                          # 用于查询的缓存的失效时间
indices.fielddata.cache.size: 30%                       # 用于排序和筛选的缓存大小，建议不少于10g，内存大小的 10% ~ 30%
indices.cluster.send_refresh_mapping: false             # 当master发送一个索引请求给节点时，节点会更新自己的映射关系表，并发送新的映射表给master，
                                                        # master根据映射表更新自己保存的映射表，以保持一致，一般情况下索引的数据格式固定，不需要
                                                        # 不断同步映射表，因此设为false提高索引速度，当索引数据结构改变时应先开启此配置，使主从节点
                                                        # 的映射表保持一致
index.merge.policy.max_merge_at_once_explicit: 50       # 一次merge（段合并）操作的允许最大段（segments）数，默认30
index.merge.scheduler.max_thread_count: 49              # merge操作最大线程数

cluster.routing.allocation.disk.threshold_enabled: true # ture时，给节点分配分片时将参考磁盘大小，会检查watermark.low和watermark.high参数
cluster.routing.allocation.disk.watermark.low: 1g       # 磁盘使用率 或 磁盘剩余大小，达到后ES不再分配新分片，1g表示磁盘剩余空间小于1g时，
                                                        # ES将停止分配新分片。也可以设置为：.97，表示磁盘利用率大于97%时不再分配新分片

cluster.routing.allocation.disk.watermark.high: 1g      # 磁盘使用率 或 磁盘剩余大小，达到后ES将开始移动分片，1g表示磁盘剩余空间小于1g时，
                                                        # ES将开始移动分片。也可以设置为：.99，表示磁盘利用率大于99%时将开始移动分片
cluster.routing.allocation.node_initial_primaries_recoveries: 10
                                                        # 在任何时间，一个节点可以有多少分片被用于执行恢复，默认为4，建议为CPU核数的80%
                                                        # 恢复分片是一个IO密集型操作，应该通过多次测试来确定该值
cluster.routing.allocation.node_concurrent_recoveries: 16
indices.recovery.concurrent_streams: 8                  # 恢复分片时，从节点传输到副本分片的平行流数量
indices.recovery.max_bytes_per_sec: 100mb               # 恢复分片时，每秒磁盘最大传输字节数
indices.recovery.translog_size: 10m                     # 恢复分片时，事务日志文件（translog）达到设置大小时，执行flush操作，默认 500m
indices.recovery.translog_ops: 10000                    # 恢复分片时，事务操作累计达到数量时执行flush操作，默认 5000

index.unassigned.node_left.delayed_timeout: 10m         # 副本重新分配的时间间隔
index.cache.query.enable: true                          # 开启缓存

script.inline: on                                       # 开启脚本
script.engine.groovy.inline.aggs: on

# ======================== Elasticsearch Configuration =========================
#
# NOTE: Elasticsearch comes with reasonable defaults for most settings.
# Before you set out to tweak and tune the configuration, make sure you
# understand what are you trying to accomplish and the consequences.
#
# The primary way of configuring a node is via this file. This template lists
# the most important settings you may want to configure for a production cluster.
#
# Please see the documentation for further information on configuration options:
# <http://www.elastic.co/guide/en/elasticsearch/reference/current/setup-configuration.html>
#
# ---------------------------------- Cluster -----------------------------------
#
# Use a descriptive name for your cluster:
# 集群名称，默认是elasticsearch
# cluster.name: my-application
#
# ------------------------------------ Node ------------------------------------
#
# Use a descriptive name for the node:
# 节点名称，默认从elasticsearch-2.4.3/lib/elasticsearch-2.4.3.jar!config/names.txt中随机选择一个名称
# node.name: node-1
#
# Add custom attributes to the node:
# 
# node.rack: r1
#
# ----------------------------------- Paths ------------------------------------
#
# Path to directory where to store the data (separate multiple locations by comma):
# 可以指定es的数据存储目录，默认存储在es_home/data目录下
# path.data: /path/to/data
#
# Path to log files:
# 可以指定es的日志存储目录，默认存储在es_home/logs目录下
# path.logs: /path/to/logs
#
# ----------------------------------- Memory -----------------------------------
# Lock the memory on startup:
# 锁定物理内存地址，防止elasticsearch内存被交换出去,也就是避免es使用swap交换分区
# bootstrap.memory_lock: true
#
# 确保ES_HEAP_SIZE参数设置为系统可用内存的一半左右
# Make sure that the `ES_HEAP_SIZE` environment variable is set to about half the memory
# available on the system and that the owner of the process is allowed to use this limit.
# 
# 当系统进行内存交换的时候，es的性能很差
# Elasticsearch performs poorly when the system is swapping the memory.
#
# ---------------------------------- Network -----------------------------------
#
#
# 为es设置ip绑定，默认是127.0.0.1，也就是默认只能通过127.0.0.1 或者localhost才能访问
# es1.x版本默认绑定的是0.0.0.0 所以不需要配置，但是es2.x版本默认绑定的是127.0.0.1，需要配置
# Set the bind address to a specific IP (IPv4 or IPv6):
#
# network.host: 192.168.0.1
#
#
# 为es设置自定义端口，默认是9200
# 注意：在同一个服务器中启动多个es节点的话，默认监听的端口号会自动加1：例如：9200，9201，9202...
# Set a custom port for HTTP:
#
# http.port: 9200
#
# For more information, see the documentation at:
# <http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-network.html>
#
# --------------------------------- Discovery ----------------------------------
#
# 当启动新节点时，通过这个ip列表进行节点发现，组建集群
# 默认节点列表：
# 127.0.0.1，表示ipv4的回环地址。
#	[::1]，表示ipv6的回环地址
#
# 在es1.x中默认使用的是组播(multicast)协议，默认会自动发现同一网段的es节点组建集群，
# 在es2.x中默认使用的是单播(unicast)协议，想要组建集群的话就需要在这指定要发现的节点信息了。
# 注意：如果是发现其他服务器中的es服务，可以不指定端口[默认9300]，如果是发现同一个服务器中的es服务，就需要指定端口了。
# Pass an initial list of hosts to perform discovery when new node is started:
# 
# The default list of hosts is ["127.0.0.1", "[::1]"]
#
# discovery.zen.ping.unicast.hosts: ["host1", "host2"]
#
# 通过配置这个参数来防止集群脑裂现象 (集群总节点数量/2)+1
# Prevent the "split brain" by configuring the majority of nodes (total number of nodes / 2 + 1):
#
# discovery.zen.minimum_master_nodes: 3
#
# For more information, see the documentation at:
# <http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-discovery.html>
#
# ---------------------------------- Gateway -----------------------------------
#
# Block initial recovery after a full cluster restart until N nodes are started:
# 一个集群中的N个节点启动后,才允许进行数据恢复处理，默认是1
# gateway.recover_after_nodes: 3
#
# For more information, see the documentation at:
# <http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-gateway.html>
#
# ---------------------------------- Various -----------------------------------
# 在一台服务器上禁止启动多个es服务
# Disable starting multiple nodes on a single system:
#
# node.max_local_storage_nodes: 1
#
# 设置是否可以通过正则或者_all删除或者关闭索引库，默认true表示必须需要显式指定索引库名称
# 生产环境建议设置为true，删除索引库的时候必须显式指定，否则可能会误删索引库中的索引库。
# Require explicit names when deleting indices:
#
# action.destructive_requires_name: true

posted @ 2018-12-28 15:24 HeCCXX 阅读(367) 评论(0) 收藏举报

刷新页面返回顶部

HeCCXX

https://github.com/HeCCXX/MyBlog/issues

elasticsearch.yml配置内容详解

公告