PF_PACKET&&tcpdump

linux下抓包原理

linux下的抓包是通过注册一种虚拟的底层网络协议来完成对网络设备消息的处理权。当网卡接收到一个网络报文之后,它会遍历系统中所有已经注册的网络协议,当抓包模块把自己伪装成一个网络协议的时候,系统在收到报文的时候就会给这个伪协议一次机会,让它来对网卡收到的报文进行一次处理,此时该模块就会趁机对报文进行窥探,也就是把这个报文完完整整的复制一份,假装是自己接收到的报文,汇报给抓包模块

在驱动收到报文送往内核协议栈时,会经过pty_all协议钩子 处理分析报文,

static int __netif_receive_skb(struct sk_buff *skb)
{
---------------------------------------------------------
    list_for_each_entry_rcu(ptype, &ptype_all, list) {
        if (!ptype->dev || ptype->dev == skb->dev) {
            if (pt_prev)
                ret = deliver_skb(skb, pt_prev, orig_dev);
            pt_prev = ptype;
        }
    }
----------------------------------------------
}

对应的注册为:

/*******************************************************************************

        Protocol management and registration routines

*******************************************************************************/

/*
 *    Add a protocol ID to the list. Now that the input handler is
 *    smarter we can dispense with all the messy stuff that used to be
 *    here.
 *
 *    BEWARE!!! Protocol handlers, mangling input packets,
 *    MUST BE last in hash buckets and checking protocol handlers
 *    MUST start from promiscuous ptype_all chain in net_bh.
 *    It is true now, do not change it.
 *    Explanation follows: if protocol handler, mangling packet, will
 *    be the first on list, it is not able to sense, that packet
 *    is cloned and should be copied-on-write, so that it will
 *    change it and subsequent readers will get broken packet.
 *                            --ANK (980803)
 */

static inline struct list_head *ptype_head(const struct packet_type *pt)
{
    if (pt->type == htons(ETH_P_ALL))
        return &ptype_all;
    else
        return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}

/**
 *    dev_add_pack - add packet handler
 *    @pt: packet type declaration
 *
 *    Add a protocol handler to the networking stack. The passed &packet_type
 *    is linked into kernel lists and may not be freed until it has been
 *    removed from the kernel lists.
 *
 *    This call does not sleep therefore it can not
 *    guarantee all CPU's that are in middle of receiving packets
 *    will see the new packet type (until the next received packet).
 */

void dev_add_pack(struct packet_type *pt)
{
    struct list_head *head = ptype_head(pt);

    spin_lock(&ptype_lock);
    list_add_rcu(&pt->list, head);
    spin_unlock(&ptype_lock);
}

 

可知tcpdump 收报就是只注册一个pty_all的伪协议钩子处理数据报文;

1.先创建socket,内核dev_add_packet()挂上自己的钩子函数
2.然后在钩子函数中,把skb放到自己的接收队列中,
3.接着系统调用recv取出skb来,把数据包skb->data拷贝到用户空间
4.最后关闭socket,内核dev_remove_packet()删除自己的钩子函数

static int __init packet_init(void)
{
    int rc = proto_register(&packet_proto, 0);

    if (rc != 0)
        goto out;

    sock_register(&packet_family_ops);//注册pf_packet 类型create socket回调钩子
    register_pernet_subsys(&packet_net_ops);
    register_netdevice_notifier(&packet_netdev_notifier);
out:
    return rc;
}

static const struct proto_ops packet_ops = {
    .family =    PF_PACKET,
    .owner =    THIS_MODULE,
    .release =    packet_release,
    .bind =        packet_bind,
    .connect =    sock_no_connect,
    .socketpair =    sock_no_socketpair,
    .accept =    sock_no_accept,
    .getname =    packet_getname,
    .poll =        packet_poll,
    .ioctl =    packet_ioctl,
    .listen =    sock_no_listen,
    .shutdown =    sock_no_shutdown,
    .setsockopt =    packet_setsockopt,
    .getsockopt =    packet_getsockopt,
    .sendmsg =    packet_sendmsg,
    .recvmsg =    packet_recvmsg,
    .mmap =        packet_mmap,
    .sendpage =    sock_no_sendpage,
};

static const struct net_proto_family packet_family_ops = {
    .family =    PF_PACKET,
    .create =    packet_create,
    .owner    =    THIS_MODULE,
};

根据socket 系统调用可知:

scoket(pf_packet,.......)会调用packet_createCreate a packet of type SOCK_PACKET.

 

 socket PF_PACKET目前有两种工作模式,以(SOCK_PACKET)类别运行的模式;和以(SOCK_DGRAM/SOCK_RAW)类别运行的模式。

前者为传统的方式,在内核和用户层拷贝数据包,并且兼容老内核的数据包抓取接口(参考以下介绍);后者为前者的替代类型,正常情况下 通过packet_rcv 处理伪二层报文,

而且可以通过设置共享内存的方式,在内核与用户层交换数据,节省内存拷贝的消耗

/*
 *    Create a packet of type SOCK_PACKET.
 */

static int packet_create(struct net *net, struct socket *sock, int protocol,
             int kern)
{
    struct sock *sk;
    struct packet_sock *po;
    __be16 proto = (__force __be16)protocol; /* weird, but documented */
    int err;

    if (!capable(CAP_NET_RAW))
        return -EPERM;
// type 必须是使用 raw/dgam or packet(比较老的版本 一般不使用)
    if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
        sock->type != SOCK_PACKET)
        return -ESOCKTNOSUPPORT;

    sock->state = SS_UNCONNECTED;

    err = -ENOBUFS;
    sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
    if (sk == NULL)
        goto out;

    sock->ops = &packet_ops;
    if (sock->type == SOCK_PACKET)//一般不使用
        sock->ops = &packet_ops_spkt;

    sock_init_data(sock, sk);

    po = pkt_sk(sk);
    sk->sk_family = PF_PACKET;
    po->num = proto;

    sk->sk_destruct = packet_sock_destruct;
    sk_refcnt_debug_inc(sk);

    /*
     *    Attach a protocol block
     */

    spin_lock_init(&po->bind_lock);
    mutex_init(&po->pg_vec_lock);
    po->prot_hook.func = packet_rcv;// 在__netif_receive_skb 处理报文的时候, 会调用prot_hook.func 也就是 packet_rcv 处理

    if (sock->type == SOCK_PACKET)
        po->prot_hook.func = packet_rcv_spkt;

    po->prot_hook.af_packet_priv = sk;

    if (proto) {
        po->prot_hook.type = proto;
        register_prot_hook(sk);//挂载对应的proto 链表上 在netif_recvive在中遍历处理
    }

    spin_lock_bh(&net->packet.sklist_lock);
    sk_add_node_rcu(sk, &net->packet.sklist);
    sock_prot_inuse_add(net, &packet_proto, 1);
    spin_unlock_bh(&net->packet.sklist_lock);

    return 0;
out:
    return err;

可以知道packet_rcv 只是简单的处理二层报文,并且挂载到socket的收包队列上,然后唤醒对应的等待进程

/*
 * This function makes lazy skb cloning in hope that most of packets
 * are discarded by BPF.
 *
 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
 * and skb->cb are mangled. It works because (and until) packets
 * falling here are owned by current CPU. Output packets are cloned
 * by dev_queue_xmit_nit(), input packets are processed by net_bh
 * sequencially, so that if we return skb to original state on exit,
 * we will not harm anyone.
 */

static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
              struct packet_type *pt, struct net_device *orig_dev)
{
    struct sock *sk;
    struct sockaddr_ll *sll;
    struct packet_sock *po;
    u8 *skb_head = skb->data;
    int skb_len = skb->len;
    unsigned int snaplen, res;

    if (skb->pkt_type == PACKET_LOOPBACK)
        goto drop;

    sk = pt->af_packet_priv;
    po = pkt_sk(sk);
    printk("skb dev name:%s dev_name:%s ptype:%x\n", skb->dev->name, dev->name, pt->type);
    if (!net_eq(dev_net(dev), sock_net(sk)))
        goto drop;

    skb->dev = dev;

    if (dev->header_ops) {
        /* The device has an explicit notion of ll header,
         * exported to higher levels.
         *
         * Otherwise, the device hides details of its frame
         * structure, so that corresponding packet head is
         * never delivered to user.
         */
        if (sk->sk_type != SOCK_DGRAM)
            skb_push(skb, skb->data - skb_mac_header(skb));
        else if (skb->pkt_type == PACKET_OUTGOING) {
            /* Special case: outgoing packets have ll header at head */
            skb_pull(skb, skb_network_offset(skb));
        }
    }

    snaplen = skb->len;

    res = run_filter(skb, sk, snaplen);//执行filter 
    if (!res)
        goto drop_n_restore;
    if (snaplen > res)
        snaplen = res;

    if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
        goto drop_n_acct;

    if (skb_shared(skb)) {
        struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
        if (nskb == NULL)
            goto drop_n_acct;

        if (skb_head != skb->data) {
            skb->data = skb_head;
            skb->len = skb_len;
        }
        consume_skb(skb);
        skb = nskb;
    }

    BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
             sizeof(skb->cb));
//读取控制信息
    sll = &PACKET_SKB_CB(skb)->sa.ll;
    sll->sll_family = AF_PACKET;
    sll->sll_hatype = dev->type;
    sll->sll_protocol = skb->protocol;
    sll->sll_pkttype = skb->pkt_type;
    if (unlikely(po->origdev))
        sll->sll_ifindex = orig_dev->ifindex;
    else
        sll->sll_ifindex = dev->ifindex;

    sll->sll_halen = dev_parse_header(skb, sll->sll_addr);

    PACKET_SKB_CB(skb)->origlen = skb->len;

    if (pskb_trim(skb, snaplen))
        goto drop_n_acct;

    skb_set_owner_r(skb, sk);
    skb->dev = NULL;
    skb_dst_drop(skb);

    /* drop conntrack reference */
    nf_reset(skb);

    spin_lock(&sk->sk_receive_queue.lock);
    po->stats.tp_packets++;
    skb->dropcount = atomic_read(&sk->sk_drops);
    __skb_queue_tail(&sk->sk_receive_queue, skb); //放在收报队列
    spin_unlock(&sk->sk_receive_queue.lock);
    sk->sk_data_ready(sk, skb->len);//唤醒等待进程
    return 0;

drop_n_acct:
    spin_lock(&sk->sk_receive_queue.lock);
    po->stats.tp_drops++;
    atomic_inc(&sk->sk_drops);
    spin_unlock(&sk->sk_receive_queue.lock);

drop_n_restore:
    if (skb_head != skb->data && skb_shared(skb)) {
        skb->data = skb_head;
        skb->len = skb_len;
    }
drop:
    consume_skb(skb);
    return 0;
}

 

posted @ 2019-12-10 16:29  codestacklinuxer  阅读(567)  评论(0编辑  收藏  举报