内核中的TCP的追踪分析-8-TCP(IPV4)的socket的地址绑定

来源:百度文库 编辑:神马文学网 时间:2024/10/02 16:14:02
内核中的TCP的追踪分析-8-TCP(IPV4)的socket的地址绑定--续1
接上一篇,我是无名小卒,请转载的朋友注明出处。这个函数在/net/ipv4/Af_inet.c中的449行处

 

sys_socketcall()-->sys_bind()-->inet_bind()

int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
    struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
    struct sock *sk = sock->sk;
    struct inet_sock *inet = inet_sk(sk);
    unsigned short snum;
    int chk_addr_ret;
    int err;

    /* If the socket has its own bind function then use it. (RAW) */
    if (sk->sk_prot->bind) {
        err = sk->sk_prot->bind(sk, uaddr, addr_len);
        goto out;
    }
    err = -EINVAL;
    if (addr_len < sizeof(struct sockaddr_in))
        goto out;

    chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);

    /* Not specified by any standard per-se, however it breaks too
     * many applications when removed. It is unfortunate since
     * allowing applications to make a non-local bind solves
     * several problems with systems using dynamic addressing.
     * (ie. your servers still start up even if your ISDN link
     * is temporarily down qinjiana0786@163.com)
     */
    err = -EADDRNOTAVAIL;
    if (!sysctl_ip_nonlocal_bind &&
     !inet->freebind &&
     addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
     chk_addr_ret != RTN_LOCAL &&
     chk_addr_ret != RTN_MULTICAST &&
     chk_addr_ret != RTN_BROADCAST)
        goto out;

    snum = ntohs(addr->sin_port);
    err = -EACCES;
    if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
        goto out;

    /* We keep a pair of addresses. rcv_saddr is the one
     * used by hash lookups, and saddr is used for transmit.
     *
     * In the BSD API these are the same except where it
     * would be illegal to use them (multicast/broadcast) in
     * which case the sending device address is used.
     */
    lock_sock(sk);

    /* Check these errors (active socket, double bind). */
    err = -EINVAL;
    if (sk->sk_state != TCP_CLOSE || inet->num)
        goto out_release_sock;

    inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
    if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
        inet->saddr = 0; /* Use device qinjiana0786@163.com*/

    /* Make sure we are allowed to bind here. wumingxiaozu*/
    if (sk->sk_prot->get_port(sk, snum)) {
        inet->saddr = inet->rcv_saddr = 0;
        err = -EADDRINUSE;
        goto out_release_sock;
    }

    if (inet->rcv_saddr)
        sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
    if (snum)
        sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
    inet->sport = htons(inet->num);
    inet->daddr = 0;
    inet->dport = 0;
    sk_dst_reset(sk);
    err = 0;
out_release_sock:
    release_sock(sk);
out:
    return err;
}

可能函数看起来比较长,贴出完整的是为了朋友们在下面阅读的随时对照,但是请朋友们看完了我的分析自己再单独阅读一下上面的代码,这样有利于提高朋友阅读代码的能力同时也加深对函数的理解。在这里我们首先看到再次将通用的数据结构sockaddr转换成我们IP中作用的地址结构sockaddr_in,这个结构已经在上面看过了。并且从socket中取出sock结构转换成我们TCP/IP使用的sock专用的结构inet_sock。接着我们看到使用sock中的sk_prot钩子结构来调用bind,而sk_prot则在上一节创建socket时我们说到了他是如何挂钩的,这里直接进入挂入的tcp_prot结构变量,它是运输层的结构体,但是我们在那里没有看到他挂入的bind钩子函数。所以if (sk->sk_prot->bind) 语句就失效了。然而函数下面是检测一下地址长度是否正确。此后进入inet_addr_type来检查地址的类型

 

sys_socketcall()-->sys_bind()-->inet_bind()-->inet_addr_type()

unsigned int inet_addr_type(struct net *net, __be32 addr)
{
    return __inet_dev_addr_type(net, NULL, addr);
}

static inline unsigned __inet_dev_addr_type(struct net *net,
                     const struct net_device *dev,
                     __be32 addr)
{
    struct flowi        fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
    struct fib_result    res;
    unsigned ret = RTN_BROADCAST;
    struct fib_table *local_table;

    if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
        return RTN_BROADCAST;
    if (ipv4_is_multicast(addr))
        return RTN_MULTICAST;

#ifdef CONFIG_IP_MULTIPLE_TABLES
    res.r = NULL;
#endif

    local_table = fib_get_table(net, RT_TABLE_LOCAL);
    if (local_table) {
        ret = RTN_UNICAST;
        if (!local_table->tb_lookup(local_table, &fl, &res)) {
            if (!dev || dev == res.fi->fib_dev)
                ret = res.type;
            fib_res_put(&res);
        }
    }
    return ret;
}

在上面函数中有几个数据结构,首先是struct flowi结构用于路由的键值。我们看到他内部的nl_u是一个联合,联合内部有三个ip4_u、ip6_u以及dn_u结构体。所以上面的struct flowi      fl = { .nl_u = { .ip4_u = { .daddr = addr } } };用我们在练习中的inet_addr("192.168.1.1")地址初始了这个路由键值结构变量fl

 

server_address.sin_addr.s_addr = inet_addr("192.168.1.1");

这是从inet_bind()在调用inet_addr_type()时通过addr->sin_addr.s_addr传递下来的

struct fib_result是返回路由查询结果用的,而struct fib_table则是路由表的结构体。函数中首先是检查确定使用本地的广播地址或者是多播地址。接着我们要结合参数net来分析,但是struct net我们还没有看过,我们看到上面传递下来的是sock_net(sk)net

 

sys_socketcall()-->sys_bind()-->inet_bind()-->sock_net()

static inline struct net *sock_net(const struct sock *sk)
{
#ifdef CONFIG_NET_NS
    return sk->sk_net;
#else
    return &init_net;
#endif
}

struct net结构非常大,他是专门用于命名网络所使用的数据结构。我们这里不列出了,只不过根据上面sock_net取我们所使用的网络空间结构。我们在分配sock结构时(见上一节中)在sk_alloc函数中曾经调用了sock_net_set对sk_net进行挂入操作

 

sys_socketcall()-->sys_socket()-->sock_create()-->__sock_create()-->通过pf->create()--> inet_create()-->sk_alloc() -->sock_net_set()

static inline void sock_net_set(struct sock *sk, struct net *net)
{
#ifdef CONFIG_NET_NS
    sk->sk_net = net;
#endif
}

而层层传递下来的net参数则是从在创建socket时

 

sys_socketcall()-->sys_socket()-->sock_create()

int sock_create(int family, int type, int protocol, struct socket **res)
{
    return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}

这个current则是当前进程的task_struct结构,其内部有一个系统的命名空间的结构变量,我们在前边看到过

/* namespaces */

         struct nsproxy *nsproxy;

这个命名空间结构中则封装着系统进程所有的共享的命名空间

struct nsproxy {
    atomic_t count;
    struct uts_namespace *uts_ns;
    struct ipc_namespace *ipc_ns;
    struct mnt_namespace *mnt_ns;
    struct pid_namespace *pid_ns;
    struct user_namespace *user_ns;
    struct net      *net_ns;
};

在内核中CONFIG_NET_NS配置选项是为了让用户自定义自己的网络空间结构,即上面的net结构,可以看出2.6.26内核的灵活性,但是我们一般在内核中不会配置该项,所以这里应该是取得init_net,这个结构是什么时间被初始化的呢?明天继续本文。