nginx匹配示例
nginx日志格式
'$remote_user [$time_local] $http_x_Forwarded_for $remote_addr $request $status $upstream_status'
'$http_x_forwarded_for'
'$upstream_addr '
'ups_resp_time: $upstream_response_time '
'request_time: $request_time';
nginx日志示例
- [09/May/2023:15:01:31 +0800] 11.20.1.30 38.34.246.127 GET / HTTP/1.1 200 -11.20.1.30- ups_resp_time: - request_time: 0.000
grok匹配
filter {
grok {
match => {
"message" => "%{DATA:remote_user} \[%{HTTPDATE:log_times}\]\s+(?:%{IPV4:http_x_Forwarded_for}|-)\s+(?:%{IPV4:remote_addr}|-)\s+%{WORD:request_method}\s+%{URIPATH:uri}(?:%{URIPARAM:params}|) HTTP/%{NUMBER:http_version} %{NUMBER:response_code} (?:%{NUMBER:upstream_status}|-)(?:%{IPV4:http_x_forwarded_for}|-) (?:%{HOSTPORT:upstream_addr}|-) ups_resp_time: (?:%{NUMBER:ups_resp_time}|-) request_time: (?:%{NUMBER:request_time}|-)"
}
}
}
# ?:%{URI:referrer}|-)正则表示如果$referer字段为空,则用"-"表示,若不为空则显示referer的内容。经测试如果直接设置成%{URI:referrer},过滤时当referer为空时,会导致grokfailure,因此需要注意此字段的正则表达式。
匹配后数据
{
"http_x_Forwarded_for" => "11.20.1.30",
"host" => "elk3",
"message" => "- [09/May/2023:15:01:31 +0800] 11.20.1.30 38.34.246.127 GET / HTTP/1.1 200 -11.20.1.30- ups_resp_time: - request_time: 0.000",
"request_method" => "GET",
"upstream_status" => "-",
"ups_resp_time" => "-",
"request_time" => "0.000",
"remote_user" => "-",
"log_times" => "09/May/2023:15:01:31 +0800",
"upstream_addr" => "-",
"@version" => "1",
"@timestamp" => 2023-05-09T08:12:35.912Z,
"http_version" => "1.1",
"remote_addr" => "38.34.246.127",
"http_x_forwarded_for" => "11.20.1.30",
"uri" => "/",
"response_code" => "200"
}
grok使用格式
%{SYNTAX:SEMANTIC}
%{预定义好的表达式的名字:自定义命名}
内置正则
1 USERNAME [a-zA-Z0-9._-]+
2 USER %{USERNAME}
3 EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+
4 EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME}
5 INT (?:[+-]?(?:[0-9]+))
6 BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
7 NUMBER (?:%{BASE10NUM})
8 BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
9 BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
10
11 POSINT \b(?:[1-9][0-9]*)\b
12 NONNEGINT \b(?:[0-9]+)\b
13 WORD \b\w+\b
14 NOTSPACE \S+
15 SPACE \s*
16 DATA .*?
17 GREEDYDATA .*
18 QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))
19 UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
20 # URN, allowing use of RFC 2141 section 2.3 reserved characters
21 URN urn:[0-9A-Za-z][0-9A-Za-z-]{0,31}:(?:%[0-9a-fA-F]{2}|[0-9A-Za-z()+,.:=@;$_!*'/?#-])+
22
23 # Networking
24 MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
25 CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
26 WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
27 COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
28 IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?
29 IPV4 (?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9])
30 IP (?:%{IPV6}|%{IPV4})
31 HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
32 IPORHOST (?:%{IP}|%{HOSTNAME})
33 HOSTPORT %{IPORHOST}:%{POSINT}
34
35 # paths
36 PATH (?:%{UNIXPATH}|%{WINPATH})
37 UNIXPATH (/([\w_%!$@:.,+~-]+|\\.)*)+
38 TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+))
39 WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+
40 URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+
41 URIHOST %{IPORHOST}(?::%{POSINT:port})?
42 # uripath comes loosely from RFC1738, but mostly from what Firefox
43 # doesn't turn into %XX
44 URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%&_\-]*)+
45 #URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
46 URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*
47 URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
48 URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
49
50 # Months: January, Feb, 3, 03, 12, December
51 MONTH \b(?:[Jj]an(?:uary|uar)?|[Ff]eb(?:ruary|ruar)?|[Mm](?:a|ä)?r(?:ch|z)?|[Aa]pr(?:il)?|[Mm]a(?:y|i)?|[Jj]un(?:e|i)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo](?:c|k)?t(?:ober)?|[Nn]ov(?:ember)?|[Dd]e(?:c|z)(?:ember)?)\b
52 MONTHNUM (?:0?[1-9]|1[0-2])
53 MONTHNUM2 (?:0[1-9]|1[0-2])
54 MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
55
56 # Days: Monday, Tue, Thu, etc...
57 DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
58
59 # Years?
60 YEAR (?>\d\d){1,2}
61 HOUR (?:2[0123]|[01]?[0-9])
62 MINUTE (?:[0-5][0-9])
63 # '60' is a leap second in most time standards and thus is valid.
64 SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
65 TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
66 # datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
67 DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
68 DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}
69 ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
70 ISO8601_SECOND (?:%{SECOND}|60)
71 TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
72 DATE %{DATE_US}|%{DATE_EU}
73 DATESTAMP %{DATE}[- ]%{TIME}
74 TZ (?:[APMCE][SD]T|UTC)
75 DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
76 DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
77 DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
78 DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}
79
80 # Syslog Dates: Month Day HH:MM:SS
81 SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
82 PROG [\x21-\x5a\x5c\x5e-\x7e]+
83 SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
84 SYSLOGHOST %{IPORHOST}
85 SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
86 HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
87
88 # Shortcuts
89 QS %{QUOTEDSTRING}
90
91 # Log formats
92 SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
93
94 # Log Levels
95 LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)