套接字的基本结构(linux)

套接字的基本结构
================
(1) socket()用于在套接字文件系统(sockfs)中创建套接字文件.
套接字文件的操作表为socket_file_ops, 它与文件套接字操作表(proto_ops)相交互,
文件套接字操作表与网络套接字操作表(proto)相交互.

(2) 文件套接字用socket结构描述, 网络套接字用sock结构描述.
文件套接字存在于inode的u.socket_i单元之中并且指向相应的网络套接字.

(3) 套接字的协议族确定了对套接字的创建操作, 套接字的类型确定了对文件套接字的操作,
套接字的协议确定了对网络套接字的操作. 对PF_INET协议族来说,
SOCK_STREAM类型的文件套接字操作表为inet_stream_ops, 网络套接字操作表为tcp_prot.
SOCK_DGRAM类型的文件套接字操作表为inet_dgram_ops, 网络套接字操作表为udp_prot.
SOCK_RAW类型的文件套接字操作表为inet_dgram_ops, 网络套接字操作表为raw_prot.

; net/ipv4/af_inet.c:

struct net_proto_family inet_family_ops = {
PF_INET,
inet_create
};

static int inet_create(struct socket *sock, int protocol)
{
struct sock *sk;
struct proto *prot;

sock->state = SS_UNCONNECTED;
sk = sk_alloc(PF_INET, GFP_KERNEL, 1);
if (sk == NULL)
goto do_oom;

switch (sock->type) {
case SOCK_STREAM:
if (protocol && protocol != IPPROTO_TCP)
goto free_and_noproto;
protocol = IPPROTO_TCP;
prot = &tcp_prot;
sock->ops = &inet_stream_ops;
break;
case SOCK_SEQPACKET:
goto free_and_badtype;
case SOCK_DGRAM:
if (protocol && protocol != IPPROTO_UDP)
goto free_and_noproto;
protocol = IPPROTO_UDP;
sk->no_check = UDP_CSUM_DEFAULT;
prot=&udp_prot;
sock->ops = &inet_dgram_ops;
break;
case SOCK_RAW:
if (!capable(CAP_NET_RAW))
goto free_and_badperm;
if (!protocol)
goto free_and_noproto;
prot = &raw_prot;
sk->reuse = 1;
sk->num = protocol;
sock->ops = &inet_dgram_ops;
if (protocol == IPPROTO_RAW)
sk->protinfo.af_inet.hdrincl = 1; 由用户创建IP包头
break;
default:
goto free_and_badtype;
}

if (ipv4_config.no_pmtu_disc)
sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
else
sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_WANT;

sock_init_data(sock,sk);

sk->destruct = inet_sock_destruct;

sk->zapped = 0;
sk->family = PF_INET;
sk->protocol = protocol;

sk->prot = prot;
sk->backlog_rcv = prot->backlog_rcv;

sk->protinfo.af_inet.ttl=sysctl_ip_default_ttl;

sk->protinfo.af_inet.mc_loop=1;
sk->protinfo.af_inet.mc_ttl=1;
sk->protinfo.af_inet.mc_index=0;
sk->protinfo.af_inet.mc_list=NULL;

#ifdef INET_REFCNT_DEBUG
atomic_inc(&inet_sock_nr);
#endif

if (sk->num) {
/* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares.
*/
sk->sport = htons(sk->num);

/* Add to protocol hash chains. */
sk->prot->hash(sk);
}

if (sk->prot->init) {
int err = sk->prot->init(sk);
if (err != 0) {
inet_sock_release(sk);
return(err);
}
}
return(0);

free_and_badtype:
sk_free(sk);
return -ESOCKTNOSUPPORT;

free_and_badperm:
sk_free(sk);
return -EPERM;

free_and_noproto:
sk_free(sk);
return -EPROTONOSUPPORT;

do_oom:
return -ENOBUFS;
}

; net/core/sock.c:

#define sock_lock_init(__sk) \
do { spin_lock_init(&((__sk)->lock.slock)); \
(__sk)->lock.users = 0; \
init_waitqueue_head(&((__sk)->lock.wq)); \
} while(0);

struct sock *sk_alloc(int family, int priority, int zero_it)
{
struct sock *sk = kmem_cache_alloc(sk_cachep, priority);

if(sk && zero_it) {
memset(sk, 0, sizeof(struct sock));
sk->family = family;
sock_lock_init(sk);
}

return sk;
}

void sock_init_data(struct socket *sock, struct sock *sk)
{
skb_queue_head_init(&sk->receive_queue);
skb_queue_head_init(&sk->write_queue);
skb_queue_head_init(&sk->error_queue);

init_timer(&sk->timer);

sk->allocation = GFP_KERNEL;
sk->rcvbuf = sysctl_rmem_default;
sk->sndbuf = sysctl_wmem_default;
sk->state = TCP_CLOSE;
sk->zapped = 1;
sk->socket = sock;

if(sock)
{
sk->type = sock->type;
sk->sleep = &sock->wait;
sock->sk = sk;
} else
sk->sleep = NULL;

sk->dst_lock = RW_LOCK_UNLOCKED;
sk->callback_lock = RW_LOCK_UNLOCKED;

sk->state_change = sock_def_wakeup;
sk->data_ready = sock_def_readable;
sk->write_space = sock_def_write_space;
sk->error_report = sock_def_error_report;
sk->destruct            =       sock_def_destruct;

sk->peercred.pid = 0;
sk->peercred.uid = -1;
sk->peercred.gid = -1;
sk->rcvlowat = 1;
sk->rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sndtimeo = MAX_SCHEDULE_TIMEOUT;

atomic_set(&sk->refcnt, 1);
}

posted on 2005-11-30 15:47  Peter.zhou  阅读(649)  评论(0)    收藏  举报

导航