Kubernetes CRD、Operator、控制器和聚合层详解

# Kubernetes CRD、Operator、控制器和聚合层深度解析与完整代码示例

## 1. Custom Resource Definition (CRD) 完整实现

### 1.1 完整CRD定义示例(带验证和子资源)

```yaml
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: databases.example.com
spec:
group: example.com
versions:
- name: v1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
required: [engine, version]
properties:
engine:
type: string
enum: [mysql, postgresql, mongodb]
version:
type: string
pattern: '^[0-9]+\.[0-9]+\.[0-9]+$'
replicas:
type: integer
minimum: 1
maximum: 10
status:
type: object
properties:
phase:
type: string
nodes:
type: array
items:
type: string
subresources:
status: {}
scale:
specReplicasPath: .spec.replicas
statusReplicasPath: .status.replicas
scope: Namespaced
names:
plural: databases
singular: database
kind: Database
shortNames: ["db"]
```

### 1.2 自定义资源实例

```yaml
apiVersion: example.com/v1
kind: Database
metadata:
name: production-db
spec:
engine: mysql
version: "8.0.26"
replicas: 3
status:
phase: Running
nodes:
- "mysql-0"
- "mysql-1"
- "mysql-2"
```

## 2. Operator 完整实现

### 2.1 完整Operator代码结构(Go语言)

```go
// main.go
package main

import (
"context"
"flag"
"os"
"time"

"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

databasev1 "github.com/example/database-operator/api/v1"
"github.com/example/database-operator/controllers"
)

var (
scheme = runtime.NewScheme()
setupLog = ctrl.Log.WithName("setup")
)

func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
utilruntime.Must(databasev1.AddToScheme(scheme))
}

func main() {
var metricsAddr string
var enableLeaderElection bool
flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "enable-leader-election", false,
"Enable leader election for controller manager.")
flag.Parse()

ctrl.SetLogger(zap.New(zap.UseDevMode(true)))

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
Scheme: scheme,
MetricsBindAddress: metricsAddr,
Port: 9443,
LeaderElection: enableLeaderElection,
LeaderElectionID: "database-operator.example.com",
})
if err != nil {
setupLog.Error(err, "unable to start manager")
os.Exit(1)
}

if err = (&controllers.DatabaseReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("Database"),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("database-controller"),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Database")
os.Exit(1)
}

setupLog.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
os.Exit(1)
}
}
```

### 2.2 控制器实现(Reconciler)

```go
// controllers/database_controller.go
package controllers

import (
"context"
"fmt"
"time"

"github.com/go-logr/logr"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

databasev1 "github.com/example/database-operator/api/v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// DatabaseReconciler reconciles a Database object
type DatabaseReconciler struct {
client.Client
Log logr.Logger
Scheme *runtime.Scheme
Recorder record.EventRecorder
}

// +kubebuilder:rbac:groups=database.example.com,resources=databases,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=database.example.com,resources=databases/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch
// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete

func (r *DatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := r.Log.WithValues("database", req.NamespacedName)

// 1. 获取Database实例
db := &databasev1.Database{}
if err := r.Get(ctx, req.NamespacedName, db); err != nil {
if errors.IsNotFound(err) {
log.Info("Database resource not found. Ignoring since object must be deleted")
return ctrl.Result{}, nil
}
log.Error(err, "Failed to get Database")
return ctrl.Result{}, err
}

// 2. 检查StatefulSet是否存在,不存在则创建
found := &appsv1.StatefulSet{}
err := r.Get(ctx, client.ObjectKey{Name: db.Name, Namespace: db.Namespace}, found)
if err != nil && errors.IsNotFound(err) {
// 定义新的StatefulSet
sts := r.statefulSetForDatabase(db)
log.Info("Creating a new StatefulSet", "StatefulSet.Namespace", sts.Namespace, "StatefulSet.Name", sts.Name)
if err = r.Create(ctx, sts); err != nil {
log.Error(err, "Failed to create new StatefulSet", "StatefulSet.Namespace", sts.Namespace, "StatefulSet.Name", sts.Name)
return ctrl.Result{}, err
}
// StatefulSet创建成功 - 返回并重新排队
return ctrl.Result{Requeue: true}, nil
} else if err != nil {
log.Error(err, "Failed to get StatefulSet")
return ctrl.Result{}, err
}

// 3. 确保副本数与spec中定义的一致
size := db.Spec.Replicas
if *found.Spec.Replicas != size {
found.Spec.Replicas = &size
if err = r.Update(ctx, found); err != nil {
log.Error(err, "Failed to update StatefulSet", "StatefulSet.Namespace", found.Namespace, "StatefulSet.Name", found.Name)
return ctrl.Result{}, err
}
// 更新成功 - 返回并重新排队
return ctrl.Result{Requeue: true}, nil
}

// 4. 更新状态
podList := &corev1.PodList{}
listOpts := []client.ListOption{
client.InNamespace(db.Namespace),
client.MatchingLabels(labelsForDatabase(db.Name)),
}
if err = r.List(ctx, podList, listOpts...); err != nil {
log.Error(err, "Failed to list pods", "Database.Namespace", db.Namespace, "Database.Name", db.Name)
return ctrl.Result{}, err
}

podNames := getPodNames(podList.Items)
if !reflect.DeepEqual(podNames, db.Status.Nodes) {
db.Status.Nodes = podNames
db.Status.Phase = databasev1.DatabasePhaseRunning
if err := r.Status().Update(ctx, db); err != nil {
log.Error(err, "Failed to update Database status")
return ctrl.Result{}, err
}
}

return ctrl.Result{}, nil
}

// 辅助函数:为Database创建StatefulSet
func (r *DatabaseReconciler) statefulSetForDatabase(db *databasev1.Database) *appsv1.StatefulSet {
ls := labelsForDatabase(db.Name)
replicas := db.Spec.Replicas

sts := &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: db.Name,
Namespace: db.Namespace,
},
Spec: appsv1.StatefulSetSpec{
Replicas: &replicas,
ServiceName: db.Name + "-svc",
Selector: &metav1.LabelSelector{
MatchLabels: ls,
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: ls,
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{{
Image: db.Spec.Engine + ":" + db.Spec.Version,
Name: db.Spec.Engine,
Ports: getDatabasePorts(db.Spec.Engine),
}},
},
},
},
}

// 设置Database实例为owner reference
ctrl.SetControllerReference(db, sts, r.Scheme)
return sts
}
```

## 3. 内置控制器扩展实现

### 3.1 自定义控制器完整示例

```go
package controllers

import (
"context"
"fmt"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
)

// PodAnnotator reconciles Pod objects
type PodAnnotator struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
}

// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;update;patch

func (r *PodAnnotator) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := log.FromContext(ctx)

// 获取Pod实例
pod := &corev1.Pod{}
if err := r.Get(ctx, req.NamespacedName, pod); err != nil {
if errors.IsNotFound(err) {
return ctrl.Result{}, nil
}
return ctrl.Result{}, err
}

// 检查是否已经添加了注解
if pod.Annotations == nil {
pod.Annotations = make(map[string]string)
}

if _, exists := pod.Annotations["example.com/injected"]; exists {
return ctrl.Result{}, nil
}

// 添加自定义注解
pod.Annotations["example.com/injected"] = "true"
pod.Annotations["example.com/timestamp"] = time.Now().Format(time.RFC3339)

// 更新Pod
if err := r.Update(ctx, pod); err != nil {
log.Error(err, "unable to update Pod annotations")
return ctrl.Result{}, err
}

r.Recorder.Event(pod, corev1.EventTypeNormal, "Annotated", "Successfully added custom annotations")
return ctrl.Result{}, nil
}

func (r *PodAnnotator) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&corev1.Pod{}).
WithEventFilter(predicate.Funcs{
CreateFunc: func(e event.CreateEvent) bool {
// 只处理特定命名空间的Pod
return e.Object.GetNamespace() == "target-namespace"
},
UpdateFunc: func(e event.UpdateEvent) bool {
// 只处理注解变化的Pod
oldAnnotations := e.ObjectOld.GetAnnotations()
newAnnotations := e.ObjectNew.GetAnnotations()
return !reflect.DeepEqual(oldAnnotations, newAnnotations)
},
}).
Complete(r)
}
```

## 4. 聚合API完整实现

### 4.1 聚合API服务实现

```go
package main

import (
"context"
"encoding/json"
"fmt"
"net/http"
"os"
"time"

"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/runtime/serializer"
genericapiserver "k8s.io/apiserver/pkg/server"
"k8s.io/client-go/rest"
"sigs.k8s.io/apiserver-runtime/pkg/builder"
)

// 定义自定义资源
var (
// GroupVersion是资源的API标识
GroupVersion = schema.GroupVersion{Group: "example.com", Version: "v1"}

// SchemeBuilder用于构建运行时Scheme
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)

// AddToScheme将API类型添加到scheme
AddToScheme = SchemeBuilder.AddToScheme
)

// addKnownTypes添加类型到Scheme
func addKnownTypes(scheme *runtime.Scheme) error {
scheme.AddKnownTypes(GroupVersion,
&Database{},
&DatabaseList{},
)
metav1.AddToGroupVersion(scheme, GroupVersion)
return nil
}

// Database自定义资源定义
type Database struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec DatabaseSpec `json:"spec,omitempty"`
Status DatabaseStatus `json:"status,omitempty"`
}

type DatabaseSpec struct {
Engine string `json:"engine"`
Version string `json:"version"`
Replicas int32 `json:"replicas"`
}

type DatabaseStatus struct {
Phase string `json:"phase"`
Nodes []string `json:"nodes"`
}

// DatabaseList是Database的集合
type DatabaseList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`

Items []Database `json:"items"`
}

func main() {
// 1. 创建Scheme
scheme := runtime.NewScheme()
metav1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"})
AddToScheme(scheme)

// 2. 创建编解码器
codecs := serializer.NewCodecFactory(scheme)

// 3. 配置API Server
recommendedConfig := genericapiserver.NewRecommendedConfig(codecs)
recommendedConfig.ExternalAddress = "0.0.0.0:8443"

// 4. 创建API Server
server, err := builder.APIServer.
WithResource(&Database{}).
WithConfig(recommendedConfig).
WithLocalDebugExtension().
Build()
if err != nil {
fmt.Fprintf(os.Stderr, "error building apiserver: %v\n", err)
os.Exit(1)
}

// 5. 添加自定义路由
server.GenericAPIServer.Handler.NonGoRestfulMux.HandleFunc("/custom", func(w http.ResponseWriter, req *http.Request) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]string{
"message": "This is a custom endpoint",
"time": time.Now().Format(time.RFC3339),
})
})

// 6. 启动Server
stopCh := genericapiserver.SetupSignalHandler()
if err := server.GenericAPIServer.PrepareRun().Run(stopCh); err != nil {
fmt.Fprintf(os.Stderr, "error running apiserver: %v\n", err)
os.Exit(1)
}
}
```

### 4.2 对应的APIService定义

```yaml
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
name: v1.example.com
spec:
service:
name: database-api-service
namespace: kube-system
group: example.com
version: v1
caBundle: <base64-encoded-ca-bundle>
groupPriorityMinimum: 1000
versionPriority: 100
```

## 5. 四者关系与交互流程

```mermaid
sequenceDiagram
participant U as 用户
participant K as kube-apiserver
participant E as etcd
participant C as 控制器
participant O as Operator
participant A as 聚合API服务

U->>K: 创建/更新CRD
K->>E: 存储CRD定义

U->>K: 创建自定义资源(CR)
K->>E: 存储CR数据

C->>K: 监听原生资源变化
K->>C: 推送变更事件
C->>K: 更新资源状态

O->>K: 监听自定义资源变化
K->>O: 推送CR变更事件
O->>K: 创建/更新关联资源
O->>K: 更新CR状态

U->>K: 访问聚合API(/apis/example.com/v1/...)
K->>A: 代理请求
A->>K: 返回响应
```

## 6. 开发工具推荐

1. **Operator开发**:
- Operator SDK (https://sdk.operatorframework.io/)
- Kubebuilder (https://book.kubebuilder.io/)

2. **聚合API开发**:
- apiserver-builder-alpha (已弃用)
- apiserver-runtime (https://github.com/kubernetes-sigs/apiserver-runtime)

3. **调试工具**:
- kube-apiserver日志 (--v=7)
- kubectl get --raw
- ksniff (网络抓包)

## 7. 生产环境注意事项

1. **CRD**:
- 使用验证schema防止非法数据
- 考虑使用finalizer进行资源清理
- 版本升级时做好转换规划

2. **Operator**:
- 实现优雅处理控制器重启
- 添加指标暴露和健康检查
- 考虑leader选举机制

3. **聚合API**:
- 实现API优先级和公平性
- 配置适当的RBAC规则
- 考虑API请求限流

4. **性能优化**:
- 使用SharedInformer减少API Server负载
- 实现resync机制处理遗漏事件
- 批量处理更新操作

这些完整示例展示了从简单CRD到复杂Operator和聚合API的实现细节,开发者可以根据实际需求选择适合的扩展方式并参考这些实现模式。

# Kubernetes CRD、Operator、控制器和聚合层详解(Python版)

## 1. Custom Resource Definition (CRD) - Python实现

### 1.1 完整CRD定义(YAML)

```yaml
# database-crd.yaml
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: databases.example.com
spec:
group: example.com
versions:
- name: v1
served: true # 启用此版本
storage: true # 此版本用于持久化存储
schema:
openAPIV3Schema: # 定义验证schema
type: object
properties:
spec:
type: object
required: [engine, version] # 必填字段
properties:
engine:
type: string
enum: [mysql, postgresql, mongodb] # 枚举值限制
version:
type: string
pattern: '^[0-9]+\.[0-9]+(\.[0-9]+)?$' # 版本号正则验证
replicas:
type: integer
minimum: 1 # 最小副本数
maximum: 10 # 最大副本数
status: # 状态字段定义
type: object
properties:
phase:
type: string
enum: [Pending, Creating, Running, Failed]
endpoints:
type: array
items:
type: string
scope: Namespaced # 可以是Namespaced或Cluster
names:
plural: databases # 复数形式
singular: database # 单数形式
kind: Database # 资源类型名
shortNames: ["db"] # 资源简称
```

### 1.2 Python客户端操作CRD

```python
from kubernetes import client, config

# 加载kubeconfig
config.load_kube_config()

# 创建API客户端
api = client.CustomObjectsApi()

# 定义自定义资源
database = {
"apiVersion": "example.com/v1",
"kind": "Database",
"metadata": {
"name": "prod-mysql",
"namespace": "default"
},
"spec": {
"engine": "mysql",
"version": "8.0.26",
"replicas": 3
}
}

# 创建自定义资源
try:
api.create_namespaced_custom_object(
group="example.com",
version="v1",
namespace="default",
plural="databases",
body=database
)
print("Database created successfully")
except client.exceptions.ApiException as e:
print(f"Failed to create database: {e}")
```

## 2. Operator实现(Python版)

### 2.1 完整Operator代码

```python
import logging
from kubernetes import client, config, watch
from kubernetes.client.rest import ApiException
import time

# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class DatabaseOperator:
def __init__(self):
# 加载kubeconfig
config.load_kube_config()

# 创建API客户端
self.api = client.CustomObjectsApi()
self.core_v1 = client.CoreV1Api()
self.apps_v1 = client.AppsV1Api()

# 定义CRD信息
self.group = "example.com"
self.version = "v1"
self.plural = "databases"

# 定义资源标签
self.labels = {"app": "database-operator", "created-by": "database-operator"}

def run(self):
"""启动Operator主循环"""
logger.info("Starting Database Operator")

# 初始化资源检查
self.ensure_initial_resources()

# 开始监听资源变化
w = watch.Watch()
for event in w.stream(self.api.list_cluster_custom_object,
group=self.group,
version=self.version,
plural=self.plural):
try:
self.handle_event(event)
except Exception as e:
logger.error(f"Error handling event: {e}")

def ensure_initial_resources(self):
"""确保必要的资源存在"""
# 这里可以检查并创建必要的ClusterRole、ServiceAccount等
pass

def handle_event(self, event):
"""处理资源变化事件"""
obj = event["object"]
event_type = event["type"]
name = obj["metadata"]["name"]
namespace = obj["metadata"].get("namespace", "default")

logger.info(f"Handling {event_type} event for {namespace}/{name}")

if event_type == "ADDED":
self.handle_creation(obj)
elif event_type == "MODIFIED":
self.handle_update(obj)
elif event_type == "DELETED":
self.handle_deletion(obj)

def handle_creation(self, db):
"""处理数据库创建"""
name = db["metadata"]["name"]
namespace = db["metadata"].get("namespace", "default")

# 1. 创建Service
self.create_service(db)

# 2. 创建StatefulSet
self.create_statefulset(db)

# 3. 更新状态
self.update_status(db, {"phase": "Creating"})

logger.info(f"Successfully initialized database {namespace}/{name}")

def create_service(self, db):
"""创建数据库Service"""
name = db["metadata"]["name"]
namespace = db["metadata"].get("namespace", "default")

service = client.V1Service(
metadata=client.V1ObjectMeta(
name=name,
namespace=namespace,
labels=self.labels,
owner_references=[self.get_owner_ref(db)]
),
spec=client.V1ServiceSpec(
selector={"app": name},
ports=[client.V1ServicePort(
port=3306,
target_port=3306,
name="mysql"
)],
cluster_ip="None"
)
)

try:
self.core_v1.create_namespaced_service(namespace, service)
logger.info(f"Created Service for {namespace}/{name}")
except ApiException as e:
if e.status == 409: # 资源已存在
logger.info(f"Service for {namespace}/{name} already exists")
else:
raise

def create_statefulset(self, db):
"""创建数据库StatefulSet"""
name = db["metadata"]["name"]
namespace = db["metadata"].get("namespace", "default")
engine = db["spec"]["engine"]
version = db["spec"]["version"]
replicas = db["spec"]["replicas"]

# 定义容器
container = client.V1Container(
name=engine,
image=f"{engine}:{version}",
ports=[client.V1ContainerPort(container_port=3306)],
env=[
client.V1EnvVar(name="MYSQL_ROOT_PASSWORD", value="password")
]
)

# 定义StatefulSet
sts = client.V1StatefulSet(
metadata=client.V1ObjectMeta(
name=name,
namespace=namespace,
labels=self.labels,
owner_references=[self.get_owner_ref(db)]
),
spec=client.V1StatefulSetSpec(
service_name=name,
replicas=replicas,
selector=client.V1LabelSelector(
match_labels={"app": name}
),
template=client.V1PodTemplateSpec(
metadata=client.V1ObjectMeta(labels={"app": name}),
spec=client.V1PodSpec(
containers=[container]
)
)
)
)

try:
self.apps_v1.create_namespaced_stateful_set(namespace, sts)
logger.info(f"Created StatefulSet for {namespace}/{name}")
except ApiException as e:
if e.status == 409: # 资源已存在
logger.info(f"StatefulSet for {namespace}/{name} already exists")
else:
raise

def update_status(self, db, status):
"""更新数据库状态"""
name = db["metadata"]["name"]
namespace = db["metadata"].get("namespace", "default")

# 获取当前资源
current = self.api.get_namespaced_custom_object(
group=self.group,
version=self.version,
namespace=namespace,
plural=self.plural,
name=name
)

# 更新状态
current["status"] = status

# 发送更新请求
self.api.replace_namespaced_custom_object_status(
group=self.group,
version=self.version,
namespace=namespace,
plural=self.plural,
name=name,
body=current
)

def get_owner_ref(self, db):
"""获取owner reference配置"""
return client.V1OwnerReference(
api_version="example.com/v1",
kind="Database",
name=db["metadata"]["name"],
uid=db["metadata"]["uid"],
block_deletion=True
)

if __name__ == "__main__":
operator = DatabaseOperator()
operator.run()
```

## 3. 控制器实现(Python版)

### 3.1 自定义控制器示例

```python
import logging
from kubernetes import client, config, watch
from kubernetes.client.rest import ApiException
import time

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class PodLabelController:
def __init__(self):
config.load_kube_config()
self.core_v1 = client.CoreV1Api()
self.watch = watch.Watch()

# 要添加的标签
self.labels_to_add = {
"managed-by": "pod-label-controller",
"injected-at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
}

def run(self):
logger.info("Starting Pod Label Controller")

# 开始监听Pod事件
for event in self.watch.stream(self.core_v1.list_pod_for_all_namespaces):
try:
self.handle_event(event)
except Exception as e:
logger.error(f"Error handling event: {e}")

def handle_event(self, event):
pod = event["object"]
event_type = event["type"]
name = pod.metadata.name
namespace = pod.metadata.namespace

logger.debug(f"Handling {event_type} event for pod {namespace}/{name}")

if event_type == "ADDED":
self.handle_pod_creation(pod)
elif event_type == "MODIFIED":
self.handle_pod_update(pod)

def handle_pod_creation(self, pod):
"""处理新创建的Pod"""
name = pod.metadata.name
namespace = pod.metadata.namespace

# 检查是否已经处理过
if self.has_required_labels(pod):
logger.debug(f"Pod {namespace}/{name} already has required labels")
return

# 添加标签
self.add_labels_to_pod(pod)

def handle_pod_update(self, pod):
"""处理Pod更新"""
# 类似创建处理逻辑
self.handle_pod_creation(pod)

def has_required_labels(self, pod):
"""检查Pod是否已经有我们需要的标签"""
if not pod.metadata.labels:
return False

for key, value in self.labels_to_add.items():
if pod.metadata.labels.get(key) != value:
return False

return True

def add_labels_to_pod(self, pod):
"""给Pod添加标签"""
name = pod.metadata.name
namespace = pod.metadata.namespace

# 准备补丁
patch = {
"metadata": {
"labels": self.labels_to_add
}
}

try:
# 使用patch而不是update,避免冲突
self.core_v1.patch_namespaced_pod(
name=name,
namespace=namespace,
body=patch
)
logger.info(f"Successfully labeled pod {namespace}/{name}")
except ApiException as e:
logger.error(f"Failed to label pod {namespace}/{name}: {e}")

if __name__ == "__main__":
controller = PodLabelController()
controller.run()
```

## 4. 聚合API实现(Python版)

### 4.1 使用Flask实现聚合API服务

```python
from flask import Flask, jsonify, request
import json
from kubernetes import client, config
from functools import wraps
import logging

app = Flask(__name__)

# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# 模拟数据库存储
databases = {}

def kubernetes_required(f):
"""验证请求来自Kubernetes API Server的装饰器"""
@wraps(f)
def decorated_function(*args, **kwargs):
# 在实际生产环境中,这里应该验证客户端证书
auth_header = request.headers.get('Authorization')
if not auth_header or not auth_header.startswith('Bearer '):
return jsonify({"message": "Unauthorized"}), 401
return f(*args, **kwargs)
return decorated_function

@app.route('/apis/example.com/v1/namespaces/<namespace>/databases', methods=['GET'])
@kubernetes_required
def list_databases(namespace):
"""列出命名空间中的所有数据库"""
logger.info(f"Listing databases in namespace {namespace}")

# 过滤出指定命名空间的数据库
namespace_dbs = [db for db in databases.values() if db["metadata"]["namespace"] == namespace]

return jsonify({
"apiVersion": "example.com/v1",
"kind": "DatabaseList",
"items": namespace_dbs
})

@app.route('/apis/example.com/v1/namespaces/<namespace>/databases/<name>', methods=['GET'])
@kubernetes_required
def get_database(namespace, name):
"""获取特定数据库"""
logger.info(f"Getting database {namespace}/{name}")

key = f"{namespace}/{name}"
if key not in databases:
return jsonify({"message": "Database not found"}), 404

return jsonify(databases[key])

@app.route('/apis/example.com/v1/namespaces/<namespace>/databases', methods=['POST'])
@kubernetes_required
def create_database(namespace):
"""创建新数据库"""
data = request.get_json()
logger.info(f"Creating database in namespace {namespace}: {data}")

# 验证必要字段
if "metadata" not in data or "name" not in data["metadata"]:
return jsonify({"message": "metadata.name is required"}), 400

name = data["metadata"]["name"]
key = f"{namespace}/{name}"

if key in databases:
return jsonify({"message": "Database already exists"}), 409

# 设置默认值
data.setdefault("apiVersion", "example.com/v1")
data.setdefault("kind", "Database")
data["metadata"].setdefault("namespace", namespace)

# 存储数据库
databases[key] = data

return jsonify(data), 201

@app.route('/apis/example.com/v1', methods=['GET'])
def get_api_versions():
"""返回支持的API版本"""
return jsonify({
"apiVersion": "v1",
"kind": "APIVersions",
"versions": ["v1"],
"serverAddressByClientCIDRs": [
{
"clientCIDR": "0.0.0.0/0",
"serverAddress": "database-api.example.com:443"
}
]
})

if __name__ == '__main__':
# 加载kubeconfig(仅用于演示)
try:
config.load_kube_config()
k8s_client = client.CoreV1Api()

# 获取集群信息(验证连接)
version = k8s_client.get_code()
logger.info(f"Connected to Kubernetes cluster: {version.git_version}")
except Exception as e:
logger.warning(f"Could not connect to Kubernetes: {e}")

# 启动API服务器
app.run(host='0.0.0.0', port=8080, ssl_context='adhoc')
```

### 4.2 对应的APIService定义

```yaml
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
name: v1.example.com
spec:
service:
name: database-api-service # 需要提前创建对应的Service
namespace: kube-system
group: example.com
version: v1
insecureSkipTLSVerify: true # 生产环境应该使用有效证书
groupPriorityMinimum: 1000
versionPriority: 100
```

## 5. 四者对比总结

| 组件 | 主要用途 | Python实现复杂度 | 典型用例 | 关键特点 |
|--------------|---------------------------------|----------------|----------------------------|----------------------------|
| **CRD** | 定义新的资源类型 | 低 | 自定义配置存储 | 只需YAML定义,无需编程 |
| **Operator** | 管理复杂有状态应用 | 高 | 数据库、消息队列等 | 包含业务逻辑和控制循环 |
| **控制器** | 维护系统资源状态 | 中 | 标签管理、资源清理 | 专注于特定资源的生命周期管理 |
| **聚合API** | 提供自定义API端点 | 高 | 指标收集、扩展功能 | 完全控制API行为和响应格式 |

## 6. 生产环境建议

1. **Operator开发**:
- 使用`kopf`框架简化Python Operator开发
```bash
pip install kopf
```
- 示例kopf Operator:
```python
import kopf

@kopf.on.create('example.com', 'v1', 'databases')
def create_fn(spec, name, namespace, **kwargs):
print(f"Database created: {namespace}/{name} with spec: {spec}")
return {'message': 'Database creation initiated'}
```

2. **错误处理**:
- 为所有Kubernetes API调用添加重试逻辑
- 实现优雅的终止处理

3. **性能优化**:
- 使用`resource_version`进行高效监听
- 批量处理事件减少API调用

4. **安全考虑**:
- 为聚合API实现严格的RBAC控制
- 使用服务账户令牌验证请求

这些Python实现提供了从简单到复杂的Kubernetes扩展方案,开发者可以根据具体需求选择合适的扩展方式。

posted on 2025-06-12 01:22  吃草的青蛙  阅读(97)  评论(0)    收藏  举报

导航