weave-npc 介绍(适配k8s v1.6)
Weave-npc使用iptables来生效network policy,由于network policy在使用namespace selector、pod selector会有匹配多个pod,所以使用linux ipset功能来批量生效iptables规则,如下所示:
-A WEAVE-NPC-INGRESS -p tcp -m set --match-set weave-?!9~5$SRrA]EcONWS|ZaRex{v src -m set --match-set weave-*J?Cn/g0cU{zEyOeo;P6+_DD2 dst -m tcp --dport 80 -j ACCEPT
其中ipset是”weave-“为前缀,后面是sha1处理过字符串。
Weave有按照如下方式初始化iptables规则。Network policy中ingress规则会在weave-npc-ingress链中添加规则(白名单),所以如果没有匹配默认规则和白名单规则的话报文就会被drop掉。
-A FORWARD -o weave -j WEAVE-NPC
-A FORWARD -o weave -m state --state NEW -j NFLOG --nflog-group 86
-A FORWARD -o weave -j DROP
… …
-A WEAVE-NPC -m state --state RELATED,ESTABLISHED -j ACCEPT
-A WEAVE-NPC -d 224.0.0.0/4 -j ACCEPT
-A WEAVE-NPC -m state --state NEW -j WEAVE-NPC-DEFAULT
-A WEAVE-NPC -m state --state NEW -j WEAVE-NPC-INGRESS
-A WEAVE-NPC -m set ! --match-set weave-local-pods dst -j ACCEPT
func root(cmd *cobra.Command, args []string) {
common.SetLogLevel(logLevel)
//用于标识主机,产生本地的规则
if nodeName == "" {
// HOSTNAME is set by Kubernetes for pods in the host network namespace
nodeName = os.Getenv("HOSTNAME")
}
if nodeName == "" {
common.Log.Fatalf("Must set node name via --node-name or $HOSTNAME")
}
common.Log.Infof("Starting Weaveworks NPC %s; node name %q", version, nodeName)
if err := metrics.Start(metricsAddr); err != nil {
common.Log.Fatalf("Failed to start metrics: %v", err)
}
if err := ulogd.Start(); err != nil {
common.Log.Fatalf("Failed to start ulogd: %v", err)
}
config, err := rest.InClusterConfig()
handleError(err)
client, err := kubernetes.NewForConfig(config)
handleError(err)
// 创建iptables 对象,用于管理iptables规则及生效
ipt, err := iptables.New()
handleError(err)
// 创建ipeset对象,用于管理ipset资源
ips := ipset.New(common.LogLogger())
// resetIPTables在filter表添加WEAVE-NPC-INGRESS链、WEAVE-NPC-DEFAULT链、
// WEAVE-NPC链,若已经存在则置空(FLUSH)
handleError(resetIPTables(ipt))
// resetIPSets 将weave-npc创建的ipset,即名字为”weave-“开头的ipset的成员删除
handleError(resetIPSets(ips))
// createBaseRules初始化iptables规则
// weave-npc链中添加iptables规则
// -A WEAVE-NPC -m state --state RELATED,ESTABLISHED -j ACCEPT
// -A WEAVE-NPC -d 224.0.0.0/4 -j ACCEPT
// -A WEAVE-NPC -m state --state NEW -j WEAVE-NPC-DEFAULT
// -A WEAVE-NPC -m state --state NEW -j WEAVE-NPC-INGRESS
// -A WEAVE-NPC -m set ! --match-set weave-local-pods dst -j ACCEPT
handleError(createBaseRules(ipt, ips))
npc := npc.New(nodeName, ipt, ips)
// 下面起了三个controller,利用client-go的informer来分别负责namespaces、pods和
// networkpolicies的创改删处理
nsController := makeController(client.Core().RESTClient(), "namespaces", &coreapi.Namespace{},
cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
handleError(npc.AddNamespace(obj.(*coreapi.Namespace)))
},
DeleteFunc: func(obj interface{}) {
switch obj := obj.(type) {
case *coreapi.Namespace:
handleError(npc.DeleteNamespace(obj))
case cache.DeletedFinalStateUnknown:
// We know this object has gone away, but its final state is no longer
// available from the API server. Instead we use the last copy of it
// that we have, which is good enough for our cleanup.
handleError(npc.DeleteNamespace(obj.Obj.(*coreapi.Namespace)))
}
},
UpdateFunc: func(old, new interface{}) {
handleError(npc.UpdateNamespace(old.(*coreapi.Namespace), new.(*coreapi.Namespace)))
}})
podController := makeController(client.Core().RESTClient(), "pods", &coreapi.Pod{},
cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
handleError(npc.AddPod(obj.(*coreapi.Pod)))
},
DeleteFunc: func(obj interface{}) {
switch obj := obj.(type) {
case *coreapi.Pod:
handleError(npc.DeletePod(obj))
case cache.DeletedFinalStateUnknown:
// We know this object has gone away, but its final state is no longer
// available from the API server. Instead we use the last copy of it
// that we have, which is good enough for our cleanup.
handleError(npc.DeletePod(obj.Obj.(*coreapi.Pod)))
}
},
UpdateFunc: func(old, new interface{}) {
handleError(npc.UpdatePod(old.(*coreapi.Pod), new.(*coreapi.Pod)))
}})
npController := makeController(client.Extensions().RESTClient(), "networkpolicies", &extnapi.NetworkPolicy{},
cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
handleError(npc.AddNetworkPolicy(obj.(*extnapi.NetworkPolicy)))
},
DeleteFunc: func(obj interface{}) {
switch obj := obj.(type) {
case *extnapi.NetworkPolicy:
handleError(npc.DeleteNetworkPolicy(obj))
case cache.DeletedFinalStateUnknown:
// We know this object has gone away, but its final state is no longer
// available from the API server. Instead we use the last copy of it
// that we have, which is good enough for our cleanup.
handleError(npc.DeleteNetworkPolicy(obj.Obj.(*extnapi.NetworkPolicy)))
}
},
UpdateFunc: func(old, new interface{}) {
handleError(npc.UpdateNetworkPolicy(old.(*extnapi.NetworkPolicy), new.(*extnapi.NetworkPolicy)))
}})
go nsController.Run(wait.NeverStop)
go podController.Run(wait.NeverStop)
go npController.Run(wait.NeverStop)
signals := make(chan os.Signal, 1)
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
common.Log.Fatalf("Exiting: %v", <-signals)
}
network policy是namespace scoped,所以资源的处理都在namespace的基础上去实现的。
func (npc *controller) AddNamespace(obj *coreapi.Namespace) error {
npc.Lock()
defer npc.Unlock()
common.Log.Infof("EVENT AddNamespace %s", js(obj))
return npc.withNS(obj.ObjectMeta.Name, func(ns *ns) error {
return errors.Wrap(ns.addNamespace(obj), "add namespace")
})
}
一个namespace会有一个podselectorSet,其中entry中包含多个selector,每对一个pod spec作provision的时候,若此pod spec之前没有处理过则会创建一个seletor,并将通过onNewPodSelector将匹配的pod ip加到selector的ipset中去。
func newNS(name, nodeName string, ipt iptables.Interface, ips ipset.Interface, nsSelectors *selectorSet) (*ns, error) {
allPods, err := newSelectorSpec(&metav1.LabelSelector{}, name, ipset.HashIP)
if err != nil {
return nil, err
}
ns := &ns{
ipt: ipt,
ips: ips,
name: name,
nodeName: nodeName,
pods: make(map[types.UID]*coreapi.Pod),
policies: make(map[types.UID]*extnapi.NetworkPolicy),
uid: uuid.NewUUID(),
allPods: allPods,
nsSelectors: nsSelectors,
rules: newRuleSet(ipt)}
ns.podSelectors = newSelectorSet(ips, ns.onNewPodSelector)
// 对allpods创建selector,因为allpods的spec是{},即选择所有的pod,因此allpods对应的selector的ipset中包含了该namespace下所有的pod。
if err := ns.podSelectors.provision(ns.uid, nil, map[string]*selectorSpec{ns.allPods.key: ns.allPods}); err != nil {
return nil, err
}
return ns, nil
}
创建namespace
k8s v1.7之前 network policy不支持default deny,default deny是使用annotation的方式实现的,yaml如下:
kind: Namespace
apiVersion: v1
metadata:
name: myns
annotations:
net.beta.kubernetes.io/network-policy: |
{
"ingress": {
"isolation": "DefaultDeny"
}
}
ipsetName为weave-$(sha1_on_namespacename)
-A WEAVE-NPC-DEFAULT -m set --match-set weave-$(sha1_on_namespacename) dst -j ACCEPT –m comment –comment “DefaultAllow isolation for namespace: $(namespace_name)“
weave默认是drop的,见如下红色。所以如果没有weave-npc中匹配接受包就会drop掉。
-A FORWARD -o weave -j WEAVE-NPC
-A FORWARD -o weave -m state --state NEW -j NFLOG --nflog-group 86
-A FORWARD -o weave -j DROP
func (ns *ns) addNamespace(obj *coreapi.Namespace) error {
ns.namespace = obj
if !isDefaultDeny(obj) {
if err := ns.ensureBypassRule(ns.allPods.ipsetName); err != nil {
return err
}
}
// Add namespace ipset to matching namespace selectors
// 遍历所有的namespace selector,若某个namespace selector与刚添加的namespace label匹配,则将此namespace添加到该selector对应的ipset下。
return ns.nsSelectors.addToMatching(obj.ObjectMeta.Labels, string(ns.allPods.ipsetName), namespaceComment(ns))
更新namesapce
func (ns *ns) updateNamespace(oldObj, newObj *coreapi.Namespace) error {
ns.namespace = newObj
// Update bypass rule if ingress default has changed
oldDefaultDeny := isDefaultDeny(oldObj)
newDefaultDeny := isDefaultDeny(newObj)
// 对比新旧使用deaultdeny的用法,选择删除或者加上by pass的规则
if oldDefaultDeny != newDefaultDeny {
common.Log.Infof("namespace DefaultDeny changed from %t to %t", oldDefaultDeny, newDefaultDeny)
if oldDefaultDeny {
if err := ns.ensureBypassRule(ns.allPods.ipsetName); err != nil {
return err
}
}
if newDefaultDeny {
if err := ns.deleteBypassRule(ns.allPods.ipsetName); err != nil {
return err
}
}
}
// Re-evaluate namespace selector membership if labels have changed
if !equals(oldObj.ObjectMeta.Labels, newObj.ObjectMeta.Labels) {
for _, selector := range ns.nsSelectors.entries {
oldMatch := selector.matches(oldObj.ObjectMeta.Labels)
newMatch := selector.matches(newObj.ObjectMeta.Labels)
if oldMatch == newMatch {
continue
}
if oldMatch {
if err := selector.delEntry(string(ns.allPods.ipsetName)); err != nil {
return err
}
}
if newMatch {
if err := selector.addEntry(string(ns.allPods.ipsetName), namespaceComment(ns)); err != nil {
return err
}
}
}
}
return nil
}
删除namespace,操作与增加相反,删除bypass rule规则,将namespace从匹配的 nsselector中删除
func (ns *ns) deleteNamespace(obj *coreapi.Namespace) error {
ns.namespace = nil
// Remove bypass rule
if !isDefaultDeny(obj) {
if err := ns.deleteBypassRule(ns.allPods.ipsetName); err != nil {
return err
}
}
// Remove namespace ipset from any matching namespace selectors
return ns.nsSelectors.delFromMatching(obj.ObjectMeta.Labels, string(ns.allPods.ipsetName))
}
创建pod
func (ns *ns) addPod(obj *coreapi.Pod) error {
ns.pods[obj.ObjectMeta.UID] = obj
// 若pod不是运行态或者使用的HostNetwork,则返回
if !hasIP(obj) {
return nil
}
//若pod属于本机,则添加到weave-local-pods ipset中
if ns.checkLocalPod(obj) {
ns.ips.AddEntry(LocalIpset, obj.Status.PodIP, podComment(obj))
}
// 若pod label与pod selector匹配,则将此pod ip添加pod selector对应的ipset
// allPodspec由于每个pod都会匹配,所以所有新增的pod都会加入allpod ipset中。
return ns.podSelectors.addToMatching(obj.ObjectMeta.Labels, obj.Status.PodIP, podComment(obj))
}
// 当pod有无状态切换时,从local ipset中添加或者删除;label切换时,根据匹配关系从pod selector ipset中添加或者删除
func (ns *ns) updatePod(oldObj, newObj *coreapi.Pod) error {
delete(ns.pods, oldObj.ObjectMeta.UID)
ns.pods[newObj.ObjectMeta.UID] = newObj
if !hasIP(oldObj) && !hasIP(newObj) {
return nil
}
if hasIP(oldObj) && !hasIP(newObj) {
if ns.checkLocalPod(oldObj) {
ns.ips.DelEntry(LocalIpset, oldObj.Status.PodIP)
}
return ns.podSelectors.delFromMatching(oldObj.ObjectMeta.Labels, oldObj.Status.PodIP)
}
if !hasIP(oldObj) && hasIP(newObj) {
if ns.checkLocalPod(newObj) {
ns.ips.AddEntry(LocalIpset, newObj.Status.PodIP, podComment(newObj))
}
return ns.podSelectors.addToMatching(newObj.ObjectMeta.Labels, newObj.Status.PodIP, podComment(newObj))
}
if !equals(oldObj.ObjectMeta.Labels, newObj.ObjectMeta.Labels) ||
oldObj.Status.PodIP != newObj.Status.PodIP {
for _, ps := range ns.podSelectors.entries {
oldMatch := ps.matches(oldObj.ObjectMeta.Labels)
newMatch := ps.matches(newObj.ObjectMeta.Labels)
if oldMatch == newMatch && oldObj.Status.PodIP == newObj.Status.PodIP {
continue
}
if oldMatch {
if err := ps.delEntry(oldObj.Status.PodIP); err != nil {
return err
}
}
if newMatch {
if err := ps.addEntry(newObj.Status.PodIP, podComment(newObj)); err != nil {
return err
}
}
}
}
return nil
}
从ipset weave-local-pods中删除该pod ip,并从匹配的pod selector ipset中删除
func (ns *ns) deletePod(obj *coreapi.Pod) error {
delete(ns.pods, obj.ObjectMeta.UID)
if !hasIP(obj) {
return nil
}
if ns.checkLocalPod(obj) {
ns.ips.DelEntry(LocalIpset, obj.Status.PodIP)
}
return ns.podSelectors.delFromMatching(obj.ObjectMeta.Labels, obj.Status.PodIP)
}
func (ns *ns) addNetworkPolicy(obj *extnapi.NetworkPolicy) error {
ns.policies[obj.ObjectMeta.UID] = obj
// Analyse policy, determine which rules and ipsets are required
rules, nsSelectors, podSelectors, err := ns.analysePolicy(obj)
if err != nil {
return err
}
// Provision required resources in dependency order
if err := ns.nsSelectors.provision(obj.ObjectMeta.UID, nil, nsSelectors); err != nil {
return err
}
if err := ns.podSelectors.provision(obj.ObjectMeta.UID, nil, podSelectors); err != nil {
return err
}
return ns.rules.provision(obj.ObjectMeta.UID, nil, rules)
}
func (ns *ns) addNetworkPolicy(obj *extnapi.NetworkPolicy) error {
ns.policies[obj.ObjectMeta.UID] = obj
// Analyse policy, determine which rules and ipsets are required
rules, nsSelectors, podSelectors, err := ns.analysePolicy(obj)
if err != nil {
return err
}
// Provision required resources in dependency order
if err := ns.nsSelectors.provision(obj.ObjectMeta.UID, nil, nsSelectors); err != nil {
return err
}
if err := ns.podSelectors.provision(obj.ObjectMeta.UID, nil, podSelectors); err != nil {
return err
}
return ns.rules.provision(obj.ObjectMeta.UID, nil, rules)
}
func (ns *ns) analysePolicy(policy *extnapi.NetworkPolicy) (
rules map[string]*ruleSpec,
nsSelectors, podSelectors map[string]*selectorSpec,
err error) {
nsSelectors = make(map[string]*selectorSpec)
podSelectors = make(map[string]*selectorSpec)
rules = make(map[string]*ruleSpec)
// pod selector 匹配目的pod
dstSelector, err := newSelectorSpec(&policy.Spec.PodSelector, ns.name, ipset.HashIP)
if err != nil {
return nil, nil, nil, err
}
podSelectors[dstSelector.key] = dstSelector
for _, ingressRule := range policy.Spec.Ingress {
// 如果ports或者from 不为空,但是没有内容,表示无报文可以匹配
// ingress:
// - ports:[]
if ingressRule.Ports != nil && len(ingressRule.Ports) == 0 {
// Ports is empty, this rule matches no ports (no traffic matches).
continue
}
// ingress:
// - from: []
// 如果ports或者from 不为空,但是没有内容,表示无报文可以匹配
if ingressRule.From != nil && len(ingressRule.From) == 0 {
// From is empty, this rule matches no sources (no traffic matches).
continue
}
if ingressRule.From == nil {
// From is not provided, this rule matches all sources (traffic not restricted by source).
if ingressRule.Ports == nil {
// Ports is not provided, this rule matches all ports (traffic not restricted by port).
rule := newRuleSpec(nil, nil, dstSelector, nil)
rules[rule.key] = rule
} else {
// Ports is present and contains at least one item, then this rule allows traffic
// only if the traffic matches at least one port in the ports list.
withNormalisedProtoAndPort(ingressRule.Ports, func(proto, port string) {
rule := newRuleSpec(&proto, nil, dstSelector, &port)
rules[rule.key] = rule
})
}
} else {
// From is present and contains at least on item, this rule allows traffic only if the
// traffic matches at least one item in the from list.
// (ingress.From)networkpolicyPeer在1.8版本中支持ipblock
// ingress:
// - from:
// - namespaceSelector:
// matchLabels:
// project: myproject
// - podSelector:
// matchLabels:
// role: frontend
// ports:
// - protocol: TCP
// port: 6379
for _, peer := range ingressRule.From {
var srcSelector *selectorSpec
if peer.PodSelector != nil {
srcSelector, err = newSelectorSpec(peer.PodSelector, ns.name, ipset.HashIP)
if err != nil {
return nil, nil, nil, err
}
podSelectors[srcSelector.key] = srcSelector
}
if peer.NamespaceSelector != nil {
srcSelector, err = newSelectorSpec(peer.NamespaceSelector, "", ipset.ListSet)
if err != nil {
return nil, nil, nil, err
}
nsSelectors[srcSelector.key] = srcSelector
}
// 在前面两步选择了selector之后选择受限访问的port
// 这里有个问题,如果namespace selector和pod selector都有的话,
// 应该是有两个match set,而不是像下面这样只使用最后一个。
if ingressRule.Ports == nil {
// Ports is not provided, this rule matches all ports (traffic not restricted by port).
rule := newRuleSpec(nil, srcSelector, dstSelector, nil)
rules[rule.key] = rule
} else {
// Ports is present and contains at least one item, then this rule allows traffic
// only if the traffic matches at least one port in the ports list.
withNormalisedProtoAndPort(ingressRule.Ports, func(proto, port string) {
rule := newRuleSpec(&proto, srcSelector, dstSelector, &port)
rules[rule.key] = rule
})
}
}
}
}
return rules, nsSelectors, podSelectors, nil
}
根据network policy规则翻译为iptables规则。weave最后有一条默认drop规则,规则都是以白名单的创建,也就是-j ACCPET
-p $(proto) –m set –match-set $(src_ipset) src –m set –match-set $(src_ipset) dst --dport $(dstPort) -j ACCPET
func newRuleSpec(proto *string, srcHost *selectorSpec, dstHost *selectorSpec, dstPort *string) *ruleSpec {
args := []string{}
if proto != nil {
args = append(args, "-p", *proto)
}
srcComment := "anywhere"
if srcHost != nil {
args = append(args, "-m", "set", "--match-set", string(srcHost.ipsetName), "src")
if srcHost.nsName != "" {
srcComment = fmt.Sprintf("pods: namespace: %s, selector: %s", srcHost.nsName, srcHost.key)
} else {
srcComment = fmt.Sprintf("namespaces: selector: %s", srcHost.key)
}
}
dstComment := "anywhere"
if dstHost != nil {
args = append(args, "-m", "set", "--match-set", string(dstHost.ipsetName), "dst")
dstComment = fmt.Sprintf("pods: namespace: %s, selector: %s", dstHost.nsName, dstHost.key)
}
if dstPort != nil {
args = append(args, "--dport", *dstPort)
}
args = append(args, "-j", "ACCEPT")
args = append(args, "-m", "comment", "--comment", fmt.Sprintf("%s -> %s", srcComment, dstComment))
key := strings.Join(args, " ")
return &ruleSpec{key, args}
}
参考:
Implement Kubernetes 1.7 NetworkPolicy semantics https://github.com/weaveworks/weave/pull/3151
浙公网安备 33010602011771号