Envoy-两种健康检测方式HTTP、TCP & 异常探测【六】

 

 

1.Readme

Outlier Detection Demo
环境说明
五个Service:

envoy:Front Proxy,地址为172.31.18.2
webserver01:第一个后端服务
webserver01-sidecar:第一个后端服务的Sidecar Proxy,地址为172.31.18.11
webserver02:第二个后端服务
webserver02-sidecar:第二个后端服务的Sidecar Proxy,地址为172.31.18.12
#运行和测试
#创建
docker-compose up
#测试
# 持续请求服务上的特定路径/livez
while true; do curl 172.31.18.2; sleep 1; done

# 等服务调度就绪后,另启一个终端,修改其中任何一个服务的/livez响应为非"OK"值,例如,修改第一个后端端点;
curl -X POST -d 'livez=FAIL' http://172.31.18.11/livez

# 通过请求的响应结果即可观测服务调度及响应的记录

# 请求中,可以看出第一个端点因主动健康状态检测失败,因而会被自动移出集群,直到其再次转为健康为止;
# 我们可使用类似如下命令修改为正常响应结果;
curl -X POST -d 'livez=OK' http://172.31.18.11/livez
停止后清理
docker-compose down
[root@xksmaster1 health-check]# cat docker-compose.yaml
# Author: MageEdu <mage@magedu.com>
# Version: v1.0.1
# Site: www.magedu.com
#
version: '3.3'

services:
  envoy:
    #image: envoyproxy/envoy-alpine:v1.21-latest
    image: envoyproxy/envoy:v1.23-latest
    environment:
      - ENVOY_UID=0
      - ENVOY_GID=0
    volumes:
    - ./front-envoy.yaml:/etc/envoy/envoy.yaml
    # - ./front-envoy-with-tcp-check.yaml:/etc/envoy/envoy.yaml
    networks:
      envoymesh:
        ipv4_address: 172.31.18.2
        aliases:
        - front-proxy
    depends_on:
    - webserver01-sidecar
    - webserver02-sidecar

  webserver01-sidecar:
    image: envoyproxy/envoy:v1.23-latest
    environment:
      - ENVOY_UID=0
      - ENVOY_GID=0
    volumes:
    - ./envoy-sidecar-proxy.yaml:/etc/envoy/envoy.yaml
    hostname: red
    networks:
      envoymesh:
        ipv4_address: 172.31.18.11
        aliases:
        - myservice

  webserver01:
    image: ikubernetes/demoapp:v1.0
    environment:
      - PORT=8080
      - HOST=127.0.0.1
    network_mode: "service:webserver01-sidecar"
    depends_on:
    - webserver01-sidecar

  webserver02-sidecar:
    image: envoyproxy/envoy:v1.23-latest
    environment:
      - ENVOY_UID=0
      - ENVOY_GID=0
    volumes:
    - ./envoy-sidecar-proxy.yaml:/etc/envoy/envoy.yaml
    hostname: blue
    networks:
      envoymesh:
        ipv4_address: 172.31.18.12
        aliases:
        - myservice

  webserver02:
    image: ikubernetes/demoapp:v1.0
    environment:
      - PORT=8080
      - HOST=127.0.0.1
    network_mode: "service:webserver02-sidecar"
    depends_on:
    - webserver02-sidecar

networks:
  envoymesh:
    driver: bridge
    ipam:
      config:
        - subnet: 172.31.18.0/24

3.front-envoy.yaml(HTTP)

front envoy 监听0.0.0.0:80 即172.31.18.2:80

cluster对myservice的80做健康检查

超时5秒,间隔10秒对/livez监控,2次失败认为失败,2次成功认为恢复.如果返回值是200-399则认为前端服务器正常,否则健康检查失败

myservice是webserver01-sidecar和webserver02-sidecar,请求转给sidecar80后再由sidecar转给webserver:8080
#      http_health_check:
admin:
  profile_path: /tmp/envoy.prof
  access_log_path: /tmp/admin_access.log
  address:
    socket_address: { address: 0.0.0.0, port_value: 9901 }

static_resources:
  listeners:
  - name: listener_0
    address:
      socket_address: { address: 0.0.0.0, port_value: 80 }
    filter_chains:
    - filters:
      - name: envoy.filters.network.http_connection_manager
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
          stat_prefix: ingress_http
          codec_type: AUTO
          route_config:
            name: local_route
            virtual_hosts:
            - name: webservice
              domains: ["*"]
              routes:
              - match: { prefix: "/" }
                route: { cluster: web_cluster_01 }
          http_filters:
          - name: envoy.filters.http.router
            typed_config:
              "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

  clusters:
  - name: web_cluster_01
    connect_timeout: 0.25s
    type: STRICT_DNS
    lb_policy: ROUND_ROBIN
    load_assignment:
      cluster_name: web_cluster_01
      endpoints:
      - lb_endpoints:
        - endpoint:
            address:
              socket_address: { address: myservice, port_value: 80 }
    health_checks:
    - timeout: 5s
      interval: 10s
      unhealthy_threshold: 2
      healthy_threshold: 2
      http_health_check:
        path: /livez
        expected_statuses:
          start: 200
          end: 399

====================front-envoy-with-tcp-check.yaml=========================
#      tcp_health_check: {}
admin:
  profile_path: /tmp/envoy.prof
  access_log_path: /tmp/admin_access.log
  address:
    socket_address: { address: 0.0.0.0, port_value: 9901 }

static_resources:
  listeners:
  - name: listener_0
    address:
      socket_address: { address: 0.0.0.0, port_value: 80 }
    filter_chains:
    - filters:
      - name: envoy.filters.network.http_connection_manager
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
          stat_prefix: ingress_http
          codec_type: AUTO
          route_config:
            name: local_route
            virtual_hosts:
            - name: webservice
              domains: ["*"]
              routes:
              - match: { prefix: "/" }
                route: { cluster: web_cluster_01 }
          http_filters:
          - name: envoy.filters.http.router
            typed_config:
              "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

  clusters:
  - name: web_cluster_01
    connect_timeout: 0.25s
    type: STRICT_DNS
    lb_policy: ROUND_ROBIN
    load_assignment:
      cluster_name: web_cluster_01
      endpoints:
      - lb_endpoints:
        - endpoint:
            address:
              socket_address: { address: myservice, port_value: 80 }
    health_checks:
    - timeout: 5s
      interval: 10s
      unhealthy_threshold: 2
      healthy_threshold: 2
      tcp_health_check: {}
admin:
  profile_path: /tmp/envoy.prof
  access_log_path: /tmp/admin_access.log
  address:
    socket_address:
       address: 0.0.0.0
       port_value: 9901

static_resources:
  listeners:
  - name: listener_0
    address:
      socket_address: { address: 0.0.0.0, port_value: 80 }
    filter_chains:
    - filters:
      - name: envoy.filters.network.http_connection_manager
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
          stat_prefix: ingress_http
          codec_type: AUTO
          route_config:
            name: local_route
            virtual_hosts:
            - name: local_service 
              domains: ["*"]
              routes:
              - match: { prefix: "/" }
                route: { cluster: local_cluster }
          http_filters:
          - name: envoy.filters.http.router
            typed_config:
              "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

  clusters:
  - name: local_cluster
    connect_timeout: 0.25s
    type: STATIC
    lb_policy: ROUND_ROBIN
    load_assignment:
      cluster_name: local_cluster
      endpoints:
      - lb_endpoints:
        - endpoint:
            address:
              socket_address: { address: 127.0.0.1, port_value: 8080 }

5.运行测试

[root@xksmaster1 health-check]# curl 172.31.18.2:9901/listeners
listener_0::0.0.0.0:80
[root@xksmaster1 health-check]# curl 172.31.18.2:9901/clusters
web_cluster_01::observability_name::web_cluster_01
web_cluster_01::default_priority::max_connections::1024
web_cluster_01::default_priority::max_pending_requests::1024
web_cluster_01::default_priority::max_requests::1024
web_cluster_01::default_priority::max_retries::3
web_cluster_01::high_priority::max_connections::1024
web_cluster_01::high_priority::max_pending_requests::1024
web_cluster_01::high_priority::max_requests::1024
web_cluster_01::high_priority::max_retries::3
web_cluster_01::added_via_api::false
web_cluster_01::172.31.18.12:80::cx_active::2
web_cluster_01::172.31.18.12:80::cx_connect_fail::0
web_cluster_01::172.31.18.12:80::cx_total::2
web_cluster_01::172.31.18.12:80::rq_active::0
web_cluster_01::172.31.18.12:80::rq_error::0
web_cluster_01::172.31.18.12:80::rq_success::13
web_cluster_01::172.31.18.12:80::rq_timeout::0
web_cluster_01::172.31.18.12:80::rq_total::13
web_cluster_01::172.31.18.12:80::hostname::myservice
web_cluster_01::172.31.18.12:80::health_flags::healthy
web_cluster_01::172.31.18.12:80::weight::1
web_cluster_01::172.31.18.12:80::region::
web_cluster_01::172.31.18.12:80::zone::
web_cluster_01::172.31.18.12:80::sub_zone::
web_cluster_01::172.31.18.12:80::canary::false
web_cluster_01::172.31.18.12:80::priority::0
web_cluster_01::172.31.18.12:80::success_rate::-1
web_cluster_01::172.31.18.12:80::local_origin_success_rate::-1
web_cluster_01::172.31.18.11:80::cx_active::0
web_cluster_01::172.31.18.11:80::cx_connect_fail::0
web_cluster_01::172.31.18.11:80::cx_total::0
web_cluster_01::172.31.18.11:80::rq_active::0
web_cluster_01::172.31.18.11:80::rq_error::0
web_cluster_01::172.31.18.11:80::rq_success::0
web_cluster_01::172.31.18.11:80::rq_timeout::0
web_cluster_01::172.31.18.11:80::rq_total::0
web_cluster_01::172.31.18.11:80::hostname::myservice
web_cluster_01::172.31.18.11:80::health_flags::/failed_active_hc
web_cluster_01::172.31.18.11:80::weight::1
web_cluster_01::172.31.18.11:80::region::
web_cluster_01::172.31.18.11:80::zone::
web_cluster_01::172.31.18.11:80::sub_zone::
web_cluster_01::172.31.18.11:80::canary::false
web_cluster_01::172.31.18.11:80::priority::0
web_cluster_01::172.31.18.11:80::success_rate::-1
web_cluster_01::172.31.18.11:80::local_origin_success_rate::-1
[root@xksmaster1 health-check]# curl -I 172.31.18.11
HTTP/1.1 200 OK
content-type: text/html; charset=utf-8
content-length: 90
server: envoy
date: Fri, 12 May 2023 06:35:37 GMT
x-envoy-upstream-service-time: 1

You have mail in /var/spool/mail/root
[root@xksmaster1 health-check]# curl -I 172.31.18.12
HTTP/1.1 200 OK
content-type: text/html; charset=utf-8
content-length: 91
server: envoy
date: Fri, 12 May 2023 06:35:40 GMT
x-envoy-upstream-service-time: 1

[root@xksmaster1 health-check]# curl 172.31.18.2
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
[root@xksmaster1 health-check]# curl 172.31.18.2
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
[root@xksmaster1 health-check]# curl 172.31.18.2
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: red, ServerIP: 172.31.18.11!
[root@xksmaster1 health-check]# curl 172.31.18.2
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
[root@xksmaster1 health-check]# curl 172.31.18.2
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: red, ServerIP: 172.31.18.11!

6.测试某个ep服务异常

将172.31.18.11/livez的livez设置成OK以外的状态,这样健康检查就会506报错
此时健康检查就会变成506
# curl 172.31.18.11/livez
OK# curl -XPOST -d "livez=FAIL" 172.31.18.11/livez
# curl 172.31.18.11/livez
FAIL# 

webserver01_1          | 127.0.0.1 - - [28/Sep/2022 04:54:27] "GET /livez HTTP/1.1" 506 -
webserver02_1          | 127.0.0.1 - - [28/Sep/2022 04:54:32] "GET /livez HTTP/1.1" 200 -
webserver01_1          | 127.0.0.1 - - [28/Sep/2022 04:54:37] "GET /livez HTTP/1.1" 506 -
webserver02_1          | 127.0.0.1 - - [28/Sep/2022 04:54:42] "GET /livez HTTP/1.1" 200 -
webserver01_1          | 127.0.0.1 - - [28/Sep/2022 04:54:47] "GET /livez HTTP/1.1" 506 -
webserver02_1          | 127.0.0.1 - - [28/Sep/2022 04:54:52] "GET /livez HTTP/1.1" 200 -
webserver01_1          | 127.0.0.1 - - [28/Sep/2022 04:54:57] "GET /livez HTTP/1.1" 506 -
webserver02_1          | 127.0.0.1 - - [28/Sep/2022 04:55:02] "GET /livez HTTP/1.1" 200 -
webserver01_1          | 127.0.0.1 - - [28/Sep/2022 04:55:07] "GET /livez HTTP/1.1" 506 -

#此时只会显示12节点 11已经剔除
[root@xksmaster1 health-check]# while true; do curl 172.31.18.2; sleep 1; done
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
恢复172.31.18.11 livez=OK

health_checks:
    - timeout: 5s
      interval: 10s
      unhealthy_threshold: 2
      healthy_threshold: 2

每10s 检查一次 连续2次正常 恢复节点 172.31.18.11
# curl -XPOST -d "livez=OK" 172.31.18.11/livez
webserver01_1          | 127.0.0.1 - - [28/Sep/2022 05:06:57] "GET /livez HTTP/1.1" 200 -
webserver02_1          | 127.0.0.1 - - [28/Sep/2022 05:07:02] "GET /livez HTTP/1.1" 200 -
webserver01_1          | 127.0.0.1 - - [28/Sep/2022 05:07:07] "GET /livez HTTP/1.1" 200 -
webserver02_1          | 127.0.0.1 - - [28/Sep/2022 05:07:12] "GET /livez HTTP/1.1" 200 -
root@k8s-node-1:~# while true;do curl 172.31.18.2;sleep 1;done
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: red, ServerIP: 172.31.18.11!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: red, ServerIP: 172.31.18.11!
iKubernetes demoapp v1.0 !! ClientIP: 127.0.0.1, ServerName: blue, ServerIP: 172.31.18.12!

异常探测

    outlier_detection:
      consecutive_5xx: 3
      base_ejection_time: 10s
      max_ejection_percent: 10

#README.md
#[root@k8s-master01 outlier-detection]# cat README.md
# Outlier Detection Demo

# 环境说明
五个Service:
- envoy:Front Proxy,地址为172.31.20.2
- webserver01:第一个后端服务
- webserver01-sidecar:第一个后端服务的Sidecar Proxy,地址为172.31.20.11
- webserver02:第二个后端服务
- webserver02-sidecar:第二个后端服务的Sidecar Proxy,地址为172.31.20.12
- webserver03:第三个后端服务
- webserver03-sidecar:第三个后端服务的Sidecar Proxy,地址为172.31.20.13

### 运行和测试
1. 创建
```
docker-compose up
```

2. 测试
```
# 持续请求服务上的特定路径/livez
while true; do curl 172.31.20.2/livez && echo; sleep 1; done

# 等服务调度就绪后,另启一个终端,修改其中任何一个服务的/livez响应为非"OK"值,例如,修改第一个后端端点;
curl -X POST -d 'livez=FAIL' http://172.31.20.11/livez

# 而后回到docker-compose命令的控制台上,或者直接通过请求的响应结果 ,即可观测服务调度及响应的记录

# 请求中,可以看出第一个端点因响应5xx的响应码,每次被加回之后,会再次弹出,除非使用类似如下命令修改为正常响应结果;
curl -X POST -d 'livez=OK' http://172.31.20.11/livez
```

3. 停止后清理
```
docker-compose down
```

[root@k8s-master01 Cluster-Manager]# cd outlier-detection/
[root@k8s-master01 outlier-detection]# ll
total 16
-rw-r--r-- 1 root root 2134 Feb 20 11:25 docker-compose.yaml
-rw-r--r-- 1 root root 1301 Aug  5  2022 envoy-sidecar-proxy.yaml
-rw-r--r-- 1 root root 1322 Aug  5  2022 front-envoy.yaml
-rw-r--r-- 1 root root 1386 Aug  5  2022 README.md

#docker-compose.yaml
[root@k8s-master01 outlier-detection]# cat docker-compose.yaml
# Author: MageEdu <mage@magedu.com>
# Version: v1.0.1
# Site: www.magedu.com
#
version: '3.3'

services:
  envoy:
    image: envoyproxy/envoy-alpine:v1.21-latest
    environment:
      - ENVOY_UID=0
      - ENVOY_GID=0
    volumes:
    - ./front-envoy.yaml:/etc/envoy/envoy.yaml
    networks:
      envoymesh:
        ipv4_address: 172.31.20.2
        aliases:
        - front-proxy
    depends_on:
    - webserver01-sidecar
    - webserver02-sidecar
    - webserver03-sidecar

  webserver01-sidecar:
    image: envoyproxy/envoy-alpine:v1.21-latest
    environment:
      - ENVOY_UID=0
      - ENVOY_GID=0
    volumes:
    - ./envoy-sidecar-proxy.yaml:/etc/envoy/envoy.yaml
    hostname: red
    networks:
      envoymesh:
        ipv4_address: 172.31.20.11
        aliases:
        - myservice

  webserver01:
    image: ikubernetes/demoapp:v1.0
    environment:
      - PORT=8080
      - HOST=127.0.0.1
    network_mode: "service:webserver01-sidecar"
    depends_on:
    - webserver01-sidecar

  webserver02-sidecar:
    image: envoyproxy/envoy-alpine:v1.21-latest
    environment:
      - ENVOY_UID=0
      - ENVOY_GID=0
    volumes:
    - ./envoy-sidecar-proxy.yaml:/etc/envoy/envoy.yaml
    hostname: blue
    networks:
      envoymesh:
        ipv4_address: 172.31.20.12
        aliases:
        - myservice

  webserver02:
    image: ikubernetes/demoapp:v1.0
    environment:
      - PORT=8080
      - HOST=127.0.0.1
    network_mode: "service:webserver02-sidecar"
    depends_on:
    - webserver02-sidecar

  webserver03-sidecar:
    image: envoyproxy/envoy-alpine:v1.21-latest
    environment:
      - ENVOY_UID=0
      - ENVOY_GID=0
    volumes:
    - ./envoy-sidecar-proxy.yaml:/etc/envoy/envoy.yaml
    hostname: green
    networks:
      envoymesh:
        ipv4_address: 172.31.20.13
        aliases:
        - myservice

  webserver03:
    image: ikubernetes/demoapp:v1.0
    environment:
      - PORT=8080
      - HOST=127.0.0.1
    network_mode: "service:webserver03-sidecar"
    depends_on:
    - webserver03-sidecar

networks:
  envoymesh:
    driver: bridge
    ipam:
      config:
        - subnet: 172.31.20.0/24

#front-envoy.yaml
#    outlier_detection:
#      consecutive_5xx: 3
#      base_ejection_time: 10s
#      max_ejection_percent: 10
[root@k8s-master01 outlier-detection]# cat front-envoy.yaml
admin:
  profile_path: /tmp/envoy.prof
  access_log_path: /tmp/admin_access.log
  address:
    socket_address: { address: 0.0.0.0, port_value: 9901 }

static_resources:
  listeners:
  - name: listener_0
    address:
      socket_address: { address: 0.0.0.0, port_value: 80 }
    filter_chains:
    - filters:
      - name: envoy.filters.network.http_connection_manager
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
          stat_prefix: ingress_http
          codec_type: AUTO
          route_config:
            name: local_route
            virtual_hosts:
            - name: webservice
              domains: ["*"]
              routes:
              - match: { prefix: "/" }
                route: { cluster: web_cluster_01 }
          http_filters:
          - name: envoy.filters.http.router
  clusters:
  - name: web_cluster_01
    connect_timeout: 0.25s
    type: STRICT_DNS
    lb_policy: ROUND_ROBIN
    load_assignment:
      cluster_name: web_cluster_01
      endpoints:
      - lb_endpoints:
        - endpoint:
            address:
              socket_address: { address: myservice, port_value: 80 }
    outlier_detection:
      consecutive_5xx: 3
      base_ejection_time: 10s
      max_ejection_percent: 10

#envoy-sidecar-proxy.yaml
[root@k8s-master01 outlier-detection]# cat envoy-sidecar-proxy.yaml
# Author: MageEdu <mage@magedu.com>
# Version: v1.0.1
# Site: www.magedu.com
#
admin:
  profile_path: /tmp/envoy.prof
  access_log_path: /tmp/admin_access.log
  address:
    socket_address:
       address: 0.0.0.0
       port_value: 9901

static_resources:
  listeners:
  - name: listener_0
    address:
      socket_address: { address: 0.0.0.0, port_value: 80 }
    filter_chains:
    - filters:
      - name: envoy.filters.network.http_connection_manager
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
          stat_prefix: ingress_http
          codec_type: AUTO
          route_config:
            name: local_route
            virtual_hosts:
            - name: local_service
              domains: ["*"]
              routes:
              - match: { prefix: "/" }
                route: { cluster: local_cluster }
          http_filters:
          - name: envoy.filters.http.router

  clusters:
  - name: local_cluster
    connect_timeout: 0.25s
    type: STATIC
    lb_policy: ROUND_ROBIN
    load_assignment:
      cluster_name: local_cluster
      endpoints:
      - lb_endpoints:
        - endpoint:
            address:
              socket_address: { address: 127.0.0.1, port_value: 8080 }

 

posted @ 2023-05-10 13:28  しみずよしだ  阅读(94)  评论(0)    收藏  举报