input {
stdin {
type => "system" # 类型标识,随便写,以便数据库查看
}
tcp {
port => 15000
codec => json # 编码方式
}
file {
path => ["/var/log/nginx/access.log"]
start_position => "beginning" # 默认end;只输入追加的
}
kafka {
bootstrap_servers => ["192.168.1.135:9092"]
group_id => "dofun-score"
auto_offset_reset => "earliest"
topics => ["score_statistic"]
consumer_threads => 10
codec => json { charset => "UTF-8" }
}
}
filter {
grok { # 通过正则解析和结构化数据
match => { "message" => "%{DATA:timestamp}\|%{IP:serverIp}\|%{IP:clientIp}\|%{DATA:reqUrl}\|%{DATA:device}\|\|"}
# 55.3.244.1 GET /index.html 15824 0.043
# match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" }
# 203.208.60.97 - - [02/Feb/2020:21:14:50 +0800] "GET /robots.txt HTTP/1.1" 404 3650 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" "-"
# grok pattern有固定的,也可以自定义
}
geoip { # 添加有关IP地址地理位置信息
source => "clientIp"
foo_%{clientIp} => "Hello world, from %{clientIp}"
# 会添加多个字段,包括geoip => {country_name:,region_code,ip,country_code2,longitude,region_name,continent_code,city_name,location=>{lat,lon}}
}
useragent { # 解析客户端设备信息
source => "device"
target => "userDevice"
}
filter {
date {
match => [ "timeMillis", "UNIX_MS" ]
}
}
}
output {
stdout{
codec => rubydebug{}
}
file {
path => "/var/log/test/test1.log"
codec => line { format => "custom format: %{message}"}
}
elasticsearch {
hosts => "192.168.9.69"
index => "logstash_test"
}
}