Envoy-两种健康检测方式HTTP、TCP & 异常探测【六】

1.Readme
Outlier Detection Demo
环境说明
五个Service:
envoy:Front Proxy,地址为172.31.18.2
webserver01:第一个后端服务
webserver01-sidecar:第一个后端服务的Sidecar Proxy,地址为172.31.18.11
webserver02:第二个后端服务
webserver02-sidecar:第二个后端服务的Sidecar Proxy,地址为172.31.18.12
#运行和测试
#创建
docker-compose up
#测试
# 持续请求服务上的特定路径/livez
while true; do curl 172.31.18.2; sleep 1; done
# 等服务调度就绪后,另启一个终端,修改其中任何一个服务的/livez响应为非"OK"值,例如,修改第一个后端端点;
curl -X POST -d 'livez=FAIL' http://172.31.18.11/livez
# 通过请求的响应结果即可观测服务调度及响应的记录
# 请求中,可以看出第一个端点因主动健康状态检测失败,因而会被自动移出集群,直到其再次转为健康为止;
# 我们可使用类似如下命令修改为正常响应结果;
curl -X POST -d 'livez=OK' http://172.31.18.11/livez
停止后清理
docker-compose down
2.docker-compose.yaml
[root@xksmaster1 health-check]# cat docker-compose.yaml
# Author: MageEdu <mage@magedu.com>
# Version: v1.0.1
# Site: www.magedu.com
#
version: '3.3'
services:
envoy:
#image: envoyproxy/envoy-alpine:v1.21-latest
image: envoyproxy/envoy:v1.23-latest
environment:
- ENVOY_UID=0
- ENVOY_GID=0
volumes:
- ./front-envoy.yaml:/etc/envoy/envoy.yaml
# - ./front-envoy-with-tcp-check.yaml:/etc/envoy/envoy.yaml
networks:
envoymesh:
ipv4_address: 172.31.18.2
aliases:
- front-proxy
depends_on:
- webserver01-sidecar
- webserver02-sidecar
webserver01-sidecar:
image: envoyproxy/envoy:v1.23-latest
environment:
- ENVOY_UID=0
- ENVOY_GID=0
volumes:
- ./envoy-sidecar-proxy.yaml:/etc/envoy/envoy.yaml
hostname: red
networks:
envoymesh:
ipv4_address: 172.31.18.11
aliases:
- myservice
webserver01:
image: ikubernetes/demoapp:v1.0
environment:
- PORT=8080
- HOST=127.0.0.1
network_mode: "service:webserver01-sidecar"
depends_on:
- webserver01-sidecar
webserver02-sidecar:
image: envoyproxy/envoy:v1.23-latest
environment:
- ENVOY_UID=0
- ENVOY_GID=0
volumes:
- ./envoy-sidecar-proxy.yaml:/etc/envoy/envoy.yaml
hostname: blue
networks:
envoymesh:
ipv4_address: 172.31.18.12
aliases:
- myservice
webserver02:
image: ikubernetes/demoapp:v1.0
environment:
- PORT=8080
- HOST=127.0.0.1
network_mode: "service:webserver02-sidecar"
depends_on:
- webserver02-sidecar
networks:
envoymesh:
driver: bridge
ipam:
config:
- subnet: 172.31.18.0/24
3.front-envoy.yaml(HTTP)
front envoy 监听0.0.0.0:80 即172.31.18.2:80
cluster对myservice的80做健康检查
超时5秒,间隔10秒对/livez监控,2次失败认为失败,2次成功认为恢复.如果返回值是200-399则认为前端服务器正常,否则健康检查失败
myservice是webserver01-sidecar和webserver02-sidecar,请求转给sidecar80后再由sidecar转给webserver:8080
# http_health_check:
admin:
profile_path: /tmp/envoy.prof
access_log_path: /tmp/admin_access.log
address:
socket_address: { address: 0.0.0.0, port_value: 9901 }
static_resources:
listeners:
- name: listener_0
address:
socket_address: { address: 0.0.0.0, port_value: 80 }
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: ingress_http
codec_type: AUTO
route_config:
name: local_route
virtual_hosts:
- name: webservice
domains: ["*"]
routes:
- match: { prefix: "/" }
route: { cluster: web_cluster_01 }
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
clusters:
- name: web_cluster_01
connect_timeout: 0.25s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: web_cluster_01
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address: { address: myservice, port_value: 80 }
health_checks:
- timeout: 5s
interval: 10s
unhealthy_threshold: 2
healthy_threshold: 2
http_health_check:
path: /livez
expected_statuses:
start: 200
end: 399
====================front-envoy-with-tcp-check.yaml=========================
# tcp_health_check: {}
admin:
profile_path: /tmp/envoy.prof
access_log_path: /tmp/admin_access.log
address:
socket_address: { address: 0.0.0.0, port_value: 9901 }
static_resources:
listeners:
- name: listener_0
address:
socket_address: { address: 0.0.0.0, port_value: 80 }
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: ingress_http
codec_type: AUTO
route_config:
name: local_route
virtual_hosts:
- name: webservice
domains: ["*"]
routes:
- match: { prefix: "/" }
route: { cluster: web_cluster_01 }
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
clusters:
- name: web_cluster_01
connect_timeout: 0.25s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: web_cluster_01
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address: { address: myservice, port_value: 80 }
health_checks:
- timeout: 5s
interval: 10s
unhealthy_threshold: 2
healthy_threshold: 2
tcp_health_check: {}
4.envoy-sidecar-proxy.yaml
admin:
profile_path: /tmp/envoy.prof
access_log_path: /tmp/admin_access.log
address:
socket_address:
address: 0.0.0.0
port_value: 9901
static_resources:
listeners:
- name: listener_0
address:
socket_address: { address: 0.0.0.0, port_value: 80 }
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: ingress_http
codec_type: AUTO
route_config:
name: local_route
virtual_hosts:
- name: local_service
domains: ["*"]
routes:
- match: { prefix: "/" }
route: { cluster: local_cluster }
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
clusters:
- name: local_cluster
connect_timeout: 0.25s
type: STATIC
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: local_cluster
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address: { address: 127.0.0.1, port_value: 8080 }
5.运行测试
[root@xksmaster1 health-check]# curl 172.31.18.2:9901/listeners
listener_0::0.0.0.0:80
[root@xksmaster1 health-check]# curl 172.31.18.2:9901/clusters
web_cluster_01::observability_name::web_cluster_01
web_cluster_01::default_priority::max_connections::1024
web_cluster_01::default_priority::max_pending_requests::1024
web_cluster_01::default_priority::max_requests::1024
web_cluster_01::default_priority::max_retries::3
web_cluster_01::high_priority::max_connections::1024
web_cluster_01::high_priority::max_pending_requests::1024
web_cluster_01::high_priority::max_requests::1024
web_cluster_01::high_priority::max_retries::3
web_cluster_01::added_via_api::false
web_cluster_01::172.31.18.12:80::cx_active::2
web_cluster_01::172.31.18.12:80::cx_connect_fail::0
web_cluster_01::172.31.18.12:80::cx_total::2
web_cluster_01::172.31.18.12:80::rq_active::0
web_cluster_01::172.31.18.12:80::rq_error::0
web_cluster_01::172.31.18.12:80::rq_success::13
web_cluster_01::172.31.18.12:80::rq_timeout::0
web_cluster_01::172.31.18.12:80::rq_total::13
web_cluster_01::172.31.18.12:80::hostname::myservice
web_cluster_01::172.31.18.12:80::health_flags::healthy
web_cluster_01::172.31.18.12:80::weight::1
web_cluster_01::172.31.18.12:80::region::
web_cluster_01::172.31.18.12:80::zone::
web_cluster_01::172.31.18.12:80::sub_zone::
web_cluster_01::172.31.18.12:80::canary::false
web_cluster_01::172.31.18.12:80::priority::0
web_cluster_01::172.31.18.12:80::success_rate::-1
web_cluster_01::172.31.18.12:80::local_origin_success_rate::-1
web_cluster_01::172.31.18.11:80::cx_active::0
web_cluster_01::172.31.18.11:80::cx_connect_fail::0
web_cluster_01::172.31.18.11:80::cx_total::0
web_cluster_01::172.31.18.11:80::rq_active::0
web_cluster_01::172.31.18.11:80::rq_error::0
web_cluster_01::172.31.18.11:80::rq_success::0
web_cluster_01::172.31.18.11:80::rq_timeout::0
web_cluster_01::172.31.18.11:80::rq_total::0
web_cluster_01::172.31.18.11:80::hostname::myservice
web_cluster_01::172.31.18.11:80::health_flags::/failed_active_hc
web_cluster_01::172.31.18.11:80::weight::1
web_cluster_01::172.31.18.11:80::region::
web_cluster_01::172.31.18.11:80::zone::
web_cluster_01::172.31.18.11:80::sub_zone::
web_cluster_01::172.31.18.11:80::canary::false
web_cluster_01::172.31.18.11:80::priority::0
web_cluster_01::172.31.18.11:80::success_rate::-1
web_cluster_01::172.31.18.11:80::local_origin_success_rate::-1
[root@xksmaster1 health-check]# curl -I 172.31.18.11
HTTP/1.1 200 OK
content-type: text/html; charset=utf-8
content-length: 90
server: envoy
date: Fri, 12 May 2023 06:35:37 GMT
x-envoy-upstream-service-time: 1
You have mail in /var/spool/mail/root
[root@xksmaster1 health-check]# curl -I 172.31.18.12
HTTP/1.1 200 OK
content-type: text/html; charset=utf-8
content-length: 91
server: envoy
date: Fri, 12 May 2023 06:35:40 GMT
x-envoy-upstream-service-time: 1
[root@xksmaster1 health-check]# curl 172.31.18.2
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
[root@xksmaster1 health-check]# curl 172.31.18.2
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
[root@xksmaster1 health-check]# curl 172.31.18.2
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: red, ServerIP: 172.31.18.11!
[root@xksmaster1 health-check]# curl 172.31.18.2
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
[root@xksmaster1 health-check]# curl 172.31.18.2
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: red, ServerIP: 172.31.18.11!
6.测试某个ep服务异常
将172.31.18.11/livez的livez设置成OK以外的状态,这样健康检查就会506报错
此时健康检查就会变成506
# curl 172.31.18.11/livez
OK# curl -XPOST -d "livez=FAIL" 172.31.18.11/livez
# curl 172.31.18.11/livez
FAIL#
webserver01_1 | 127.0.0.1 - - [28/Sep/2022 04:54:27] "GET /livez HTTP/1.1" 506 -
webserver02_1 | 127.0.0.1 - - [28/Sep/2022 04:54:32] "GET /livez HTTP/1.1" 200 -
webserver01_1 | 127.0.0.1 - - [28/Sep/2022 04:54:37] "GET /livez HTTP/1.1" 506 -
webserver02_1 | 127.0.0.1 - - [28/Sep/2022 04:54:42] "GET /livez HTTP/1.1" 200 -
webserver01_1 | 127.0.0.1 - - [28/Sep/2022 04:54:47] "GET /livez HTTP/1.1" 506 -
webserver02_1 | 127.0.0.1 - - [28/Sep/2022 04:54:52] "GET /livez HTTP/1.1" 200 -
webserver01_1 | 127.0.0.1 - - [28/Sep/2022 04:54:57] "GET /livez HTTP/1.1" 506 -
webserver02_1 | 127.0.0.1 - - [28/Sep/2022 04:55:02] "GET /livez HTTP/1.1" 200 -
webserver01_1 | 127.0.0.1 - - [28/Sep/2022 04:55:07] "GET /livez HTTP/1.1" 506 -
#此时只会显示12节点 11已经剔除
[root@xksmaster1 health-check]# while true; do curl 172.31.18.2; sleep 1; done
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
恢复172.31.18.11 livez=OK
health_checks:
- timeout: 5s
interval: 10s
unhealthy_threshold: 2
healthy_threshold: 2
每10s 检查一次 连续2次正常 恢复节点 172.31.18.11
# curl -XPOST -d "livez=OK" 172.31.18.11/livez
webserver01_1 | 127.0.0.1 - - [28/Sep/2022 05:06:57] "GET /livez HTTP/1.1" 200 -
webserver02_1 | 127.0.0.1 - - [28/Sep/2022 05:07:02] "GET /livez HTTP/1.1" 200 -
webserver01_1 | 127.0.0.1 - - [28/Sep/2022 05:07:07] "GET /livez HTTP/1.1" 200 -
webserver02_1 | 127.0.0.1 - - [28/Sep/2022 05:07:12] "GET /livez HTTP/1.1" 200 -
root@k8s-node-1:~# while true;do curl 172.31.18.2;sleep 1;done
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: red, ServerIP: 172.31.18.11!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: red, ServerIP: 172.31.18.11!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
异常探测
outlier_detection:
consecutive_5xx: 3
base_ejection_time: 10s
max_ejection_percent: 10
#README.md
#[root@k8s-master01 outlier-detection]# cat README.md
# Outlier Detection Demo
# 环境说明
五个Service:
- envoy:Front Proxy,地址为172.31.20.2
- webserver01:第一个后端服务
- webserver01-sidecar:第一个后端服务的Sidecar Proxy,地址为172.31.20.11
- webserver02:第二个后端服务
- webserver02-sidecar:第二个后端服务的Sidecar Proxy,地址为172.31.20.12
- webserver03:第三个后端服务
- webserver03-sidecar:第三个后端服务的Sidecar Proxy,地址为172.31.20.13
### 运行和测试
1. 创建
```
docker-compose up
```
2. 测试
```
# 持续请求服务上的特定路径/livez
while true; do curl 172.31.20.2/livez && echo; sleep 1; done
# 等服务调度就绪后,另启一个终端,修改其中任何一个服务的/livez响应为非"OK"值,例如,修改第一个后端端点;
curl -X POST -d 'livez=FAIL' http://172.31.20.11/livez
# 而后回到docker-compose命令的控制台上,或者直接通过请求的响应结果 ,即可观测服务调度及响应的记录
# 请求中,可以看出第一个端点因响应5xx的响应码,每次被加回之后,会再次弹出,除非使用类似如下命令修改为正常响应结果;
curl -X POST -d 'livez=OK' http://172.31.20.11/livez
```
3. 停止后清理
```
docker-compose down
```
[root@k8s-master01 Cluster-Manager]# cd outlier-detection/
[root@k8s-master01 outlier-detection]# ll
total 16
-rw-r--r-- 1 root root 2134 Feb 20 11:25 docker-compose.yaml
-rw-r--r-- 1 root root 1301 Aug 5 2022 envoy-sidecar-proxy.yaml
-rw-r--r-- 1 root root 1322 Aug 5 2022 front-envoy.yaml
-rw-r--r-- 1 root root 1386 Aug 5 2022 README.md
#docker-compose.yaml
[root@k8s-master01 outlier-detection]# cat docker-compose.yaml
# Author: MageEdu <mage@magedu.com>
# Version: v1.0.1
# Site: www.magedu.com
#
version: '3.3'
services:
envoy:
image: envoyproxy/envoy-alpine:v1.21-latest
environment:
- ENVOY_UID=0
- ENVOY_GID=0
volumes:
- ./front-envoy.yaml:/etc/envoy/envoy.yaml
networks:
envoymesh:
ipv4_address: 172.31.20.2
aliases:
- front-proxy
depends_on:
- webserver01-sidecar
- webserver02-sidecar
- webserver03-sidecar
webserver01-sidecar:
image: envoyproxy/envoy-alpine:v1.21-latest
environment:
- ENVOY_UID=0
- ENVOY_GID=0
volumes:
- ./envoy-sidecar-proxy.yaml:/etc/envoy/envoy.yaml
hostname: red
networks:
envoymesh:
ipv4_address: 172.31.20.11
aliases:
- myservice
webserver01:
image: ikubernetes/demoapp:v1.0
environment:
- PORT=8080
- HOST=127.0.0.1
network_mode: "service:webserver01-sidecar"
depends_on:
- webserver01-sidecar
webserver02-sidecar:
image: envoyproxy/envoy-alpine:v1.21-latest
environment:
- ENVOY_UID=0
- ENVOY_GID=0
volumes:
- ./envoy-sidecar-proxy.yaml:/etc/envoy/envoy.yaml
hostname: blue
networks:
envoymesh:
ipv4_address: 172.31.20.12
aliases:
- myservice
webserver02:
image: ikubernetes/demoapp:v1.0
environment:
- PORT=8080
- HOST=127.0.0.1
network_mode: "service:webserver02-sidecar"
depends_on:
- webserver02-sidecar
webserver03-sidecar:
image: envoyproxy/envoy-alpine:v1.21-latest
environment:
- ENVOY_UID=0
- ENVOY_GID=0
volumes:
- ./envoy-sidecar-proxy.yaml:/etc/envoy/envoy.yaml
hostname: green
networks:
envoymesh:
ipv4_address: 172.31.20.13
aliases:
- myservice
webserver03:
image: ikubernetes/demoapp:v1.0
environment:
- PORT=8080
- HOST=127.0.0.1
network_mode: "service:webserver03-sidecar"
depends_on:
- webserver03-sidecar
networks:
envoymesh:
driver: bridge
ipam:
config:
- subnet: 172.31.20.0/24
#front-envoy.yaml
# outlier_detection:
# consecutive_5xx: 3
# base_ejection_time: 10s
# max_ejection_percent: 10
[root@k8s-master01 outlier-detection]# cat front-envoy.yaml
admin:
profile_path: /tmp/envoy.prof
access_log_path: /tmp/admin_access.log
address:
socket_address: { address: 0.0.0.0, port_value: 9901 }
static_resources:
listeners:
- name: listener_0
address:
socket_address: { address: 0.0.0.0, port_value: 80 }
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: ingress_http
codec_type: AUTO
route_config:
name: local_route
virtual_hosts:
- name: webservice
domains: ["*"]
routes:
- match: { prefix: "/" }
route: { cluster: web_cluster_01 }
http_filters:
- name: envoy.filters.http.router
clusters:
- name: web_cluster_01
connect_timeout: 0.25s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: web_cluster_01
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address: { address: myservice, port_value: 80 }
outlier_detection:
consecutive_5xx: 3
base_ejection_time: 10s
max_ejection_percent: 10
#envoy-sidecar-proxy.yaml
[root@k8s-master01 outlier-detection]# cat envoy-sidecar-proxy.yaml
# Author: MageEdu <mage@magedu.com>
# Version: v1.0.1
# Site: www.magedu.com
#
admin:
profile_path: /tmp/envoy.prof
access_log_path: /tmp/admin_access.log
address:
socket_address:
address: 0.0.0.0
port_value: 9901
static_resources:
listeners:
- name: listener_0
address:
socket_address: { address: 0.0.0.0, port_value: 80 }
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: ingress_http
codec_type: AUTO
route_config:
name: local_route
virtual_hosts:
- name: local_service
domains: ["*"]
routes:
- match: { prefix: "/" }
route: { cluster: local_cluster }
http_filters:
- name: envoy.filters.http.router
clusters:
- name: local_cluster
connect_timeout: 0.25s
type: STATIC
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: local_cluster
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address: { address: 127.0.0.1, port_value: 8080 }

浙公网安备 33010602011771号