代码拉取完成,页面将自动刷新
/*
unix_socket, SOCK_STREAM 传输.
client: socket() => connect() => write()
server: socket() => bind() => listen() => accept() => read()
socket()
__do_sys_socket()
server 创建socket client 创建socket.
创建struct socket *socket;
创建struct sock *sock;
创建struct file *newfile, newfile->private_data 指向 socket.
分配fd, 关联fd与newfile.
形成 fd-> newfile -> socket的关系.
将fd返回给用户.
bind()
__do_sys_bind()
这里的sock 为server 执行__do_sys_socket()创建的 struct sock *sock
使用用户传入的 socket文件路径名 创建 文件dentry. 将 sock 转换为 struct unix_sock *u_sock,
设置 u_sock->path.dentry 为 socket文件的dentry.
将sock 加入struct hlist_head bsd_socket_buckets[]数组.
listen()
__do_sys_listen()
这里的sock 为server 执行 __do_sys_socket()创建的 struct sock *sock
将sock->sk_state 设置为 TCP_LISTEN.
accept()
__do_sys_accept()
这里的sock 为_server 执行_do_sys_socket()创建的 struct sock *sock
创建struct socket *new_socket, struct file *newfile.
sock->sk_receive_queue 队列内, 有__do_sys_connect()函数新加入的skb.
将skb->sk 取出, 获得__do_sys_connect()函数的new_sock, 这样server和client都能访问new_sock.
后续将使用new_sock->sk_receive_queue队列来进行 server和client间的skb传输.
创建 struct file *s_new_file. 分配新的fd. 关联fd与s_new_file.
s_new_file->private_data = new_socket. new_socket->sk = new_sock.
将新分配的fd返回给用户.
connect()
__do_sys_connect
根据用户传入的socket文件的路径名, 找到对应文件的在sockfs中的dentry.
遍历bsd_socket_buckets[] 数组内存储的struct sock *sock, 将sock转换为struct unix_sock *u_sock.
匹配u_sock->path.dentry 与用户传入的socket文件的路径的dentry, 找到dentry相同的sock.
这里找到的sock, 就是server在__do_sys_socket()函数内创建的.
设置 u_sock->peer = sock.
创建 struct sk_buff *skb, 创建 struct sock *new_sock, skb->sk = new_sock, 建立skb与new_sock的关联.
将skb加入 server的sock->sk_receive_queue 队列内.
write()
sock_write_iter
client 执行写操作. 传入client执行socket()函数获取的fd.
fd => file.private_data => socket->sk => sock => u_sock->peer => new_sock.
通过fd 最终获得new_sock. 这里的new_sock 即为__do_sys_connect()函数内创建的, 通过skb传给server的.
创建 struct sk_buff *c_skb将用户传入的buffer复制到 c_skb->data. 将c_skb 加入 new_sock->sk_receive_queue队列.
read()
sock_read_iter
server 执行读操作. __do_sys_accept()函数返回的fd 传入sock_read_iter()读函数.
fd => s_new_file->private_data => new_socket->sk => new_sock.
从fd 最终获得new_sock, 从new_sock->sk_receive_queue队列内获得 sock_write_iter()新加入的skb,
将skb->data 内容复制 给用户传入的buffer内.
*/
net_namespace_list <list1>
struct net_generic *ng; <tag1>
struct net init_net;
-> list <tag2 add2tail list2>
===================app================
---------server ------------
struct socket *sock;<tag3> <obj4>
->short type; <tag4> <"用户输入参数"> <obj2>
->const struct proto_ops *ops; <tag5>
->struct sock *sk; <tag7> <point2 obj1>
->struct file *file; <tag10> <point2 obj3>
struct sock *sk; <obj1> <tag6>
->sk_type; <tag8> <cp from obj2>
->sk_state = TCP_CLOSE; <tag44>
= TCP_LISTEN <tag15>
->struct sk_buff_head sk_receive_queue <list3>
->sk_bind_node; <tag65> <add2head list9>
->struct socket *sk_socket <tag76> <point2 obj4>
struct unix_sock ->path.dentry <tag73> <"将使用 socket名创建的文件的dentry, 保存到这里.">
struct file *newfile; <tag9> <obj3>
->private_data; <tag11> <point2 obj4>
struct sockaddr_storage address; <tag12> <cp from user func bind(xx,addr,xx)> <obj5>
struct unix_address *addr; <tag13>
->struct sockaddr_un name[]; <tag14> <cp from obj5>
->char sun_path[UNIX_PATH_MAX]; <"path name, socket文件的名字">
struct socket *newsock; <tag16> <obj7>
->type; <tag17> <cp from obj2>
->struct file *file; <tag19> <point2 obj6>
->struct sock *sk; <tag25> <point2 obj10>
struct file *newfile_acept; <tag18> <obj6>
->private_data <tag20> <point2 obj7>
struct sk_buff *skb; <tag21> <get from list3> <same to obj40>
->struct sock *sk; <obj9> <same to obj19>
struct sock *tsk; <tag23> <point2 obj9> <obj10>
->sk_receive_queue <list8>
struct msghdr msg;
->struct iov_iter msg_iter; <tag57> <cp from params> <"从内核获取数据,返回给user"> <tag59> <cp data from obj31>
->struct kiocb *msg_iocb; <tag57>
struct sk_buff *get_skb; <tag58> <get skb from list8> <obj31>
bsd_socket_buckets[] <list9>
-------end server --------
-------client-------------
struct socket *client_sock; <tag27><obj14>
->type <tag28> <cp from user>
->struct sock *sk; <tag30> <point2 obj11>
->struct file *file; <tag32> <point2 obj13>
struct sock *client_sk; <tag29> <obj11>
->state = SS_CONNECTED; <tag46>
->sk_state = TCP_CLOSE; <tag44>
= TCP_ESTABLISHED; <tag45> <"ESTABLISHED 已建立">
struct unix_sock ->peer; <tag72> <point2 obj19>
struct file *client_newfile; <tag31> <obj13>
->private_data <tag33> <point2 obj14>
struct sock *newsk <tag35> <obj19>
->sk_state = TCP_ESTABLISHED; <tag71>
struct sk_buff *skb <tag36> <obj40> <tag70 add2 list11>
->unsigned char *head <tag38> <point2 obj16>
->unsigned char *data <tag39> <point2 obj16>
->struct sock *sk; <tag40> <point2 obj19>
u8 *data; <tag37> <obj16>
struct sockaddr_storage client_address; <tag47> <cp from user connect(xxx,addr,xxx)> <obj21>
struct sockaddr_un *sunaddr; <tag41> <point2 obj21>
struct sock *path_sk; <tag43> <same to obj1> <" server 传来的">
->sk_receive_queue; <list11>
struct sock *wrtite_other <tag48> <get form obj11> <point2 obj19>
->sk_receive_queue <list5>
/*
* <tag48> :
* struct sock *sk 2 struct unix_sock *usk;
* other = usk->peer;
*
* struct unix_sock {
* struct sock sk;
* .....
* struct sock *peer;
*
* }
* 即usk 完全包含sk, 因此分配sk内存时, 应该分大一些.
*
* */
struct sk_buff *client_skb; <tag49> <tag55 add2tail list5>
->data <tag52> <cp from obj15>
struct msghdr msg;
->struct iov_iter msg_iter; <tag50> <copy from param> <obj15> <"从 user复制数据到kernel">
->struct kiocb *msg_iocb <tag50>
-------end client---------
===============code==============
void __init net_ns_init(void)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
struct net_generic *ng;
ng = net_alloc_generic(); <tag1>
static struct net_generic *net_alloc_generic(void)
{
struct net_generic *ng;
unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
ng = kzalloc(generic_size, GFP_KERNEL);
if (ng)
ng->s.len = max_gen_ptrs;
return ng;
}
rcu_assign_pointer(init_net.gen, ng);
down_write(&pernet_ops_rwsem);
setup_net(&init_net, &init_user_ns);
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
/* Must be called with pernet_ops_rwsem held */
const struct pernet_operations *ops, *saved_ops;
int error = 0;
LIST_HEAD(net_exit_list);
net->dev_base_seq = 1;
net->user_ns = user_ns;
spin_lock_init(&net->nsid_lock);
mutex_init(&net->ipv4.ra_mutex);
down_write(&net_rwsem);
list_add_tail_rcu(&net->list, &net_namespace_list);
up_write(&net_rwsem);
return error;
}
init_net_initialized = true;
up_write(&pernet_ops_rwsem);
}
int register_pernet_subsys(struct pernet_operations *ops)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
int error;
down_write(&pernet_ops_rwsem);
error = register_pernet_operations(first_device, ops);
static int register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
{
int error;
if (ops->id) {
pr_err("%s:in %d.\n",__func__,__LINE__);
error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID,
GFP_KERNEL);
*ops->id = error;
max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
}
error = __register_pernet_operations(list, ops);
static int __register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
{
struct net *net;
int error;
LIST_HEAD(net_exit_list);
list_add_tail(&ops->list, list);
if (ops->init || (ops->id && ops->size)) {
/* We held write locked pernet_ops_rwsem, and parallel
* setup_net() and cleanup_net() are not possible.
*/
for_each_net(net) {
error = ops_init(ops, net);
static int ops_init(const struct pernet_operations *ops, struct net *net)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
struct net_generic *ng;
int err = -ENOMEM;
void *data = NULL;
if (ops->id && ops->size) { //
data = kzalloc(ops->size, GFP_KERNEL);
err = net_assign_generic(net, *ops->id, data);
}
err = 0;
if (ops->init) { //
err = ops->init(net);
}
if (!err) { //
return 0;
}
return err;
}
list_add_tail(&net->exit_list, &net_exit_list);
}
}
return 0;
}
return error;
}
up_write(&pernet_ops_rwsem);
return error;
}
struct net *copy_net_ns(unsigned long flags,
struct user_namespace *user_ns, struct net *old_net)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
struct ucounts *ucounts;
struct net *net;
int rv;
if (!(flags & CLONE_NEWNET)) { //
return get_net(old_net);
}
return net;
}
static int __init sock_init(void)
{
int err;
/*
* Initialize the network sysctl infrastructure.
*/
err = net_sysctl_init();
__init int net_sysctl_init(void)
{
static struct ctl_table empty[1];
int ret = -ENOMEM;
/* Avoid limitations in the sysctl implementation by
* registering "/proc/sys/net" as an empty directory not in a
* network namespace.
*/
ret = register_pernet_subsys(&sysctl_pernet_ops);
return ret;
}
/*
* Initialize skbuff SLAB cache
*/
skb_init();
void __init skb_init(void)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
sizeof(struct sk_buff),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
offsetof(struct sk_buff, cb),
sizeof_field(struct sk_buff, cb),
NULL);
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
sizeof(struct sk_buff_fclones),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
/* usercopy should only access first SKB_SMALL_HEAD_HEADROOM bytes.
* struct skb_shared_info is located at the end of skb->head,
* and should not be copied to/from user.
*/
skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head",
SKB_SMALL_HEAD_CACHE_SIZE,
0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC,
0,
SKB_SMALL_HEAD_HEADROOM,
NULL);
}
/*
* Initialize the protocols module.
*/
init_inodecache();
static void init_inodecache(void)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
sock_inode_cachep = kmem_cache_create("sock_inode_cache",
sizeof(struct socket_alloc),
0,
(SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
SLAB_MEM_SPREAD | SLAB_ACCOUNT),
init_once);
}
sock_mnt = kern_mount(&sock_fs_type);
static int sockfs_init_fs_context(struct fs_context *fc)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
ctx->ops = &sockfs_ops;
ctx->dops = &sockfs_dentry_operations;
ctx->xattr = sockfs_xattr_handlers;
return 0;
}
return err;
}
sock_alloc_inode
static __init int net_inuse_init(void)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
if (register_pernet_subsys(&net_inuse_ops))
panic("Cannot initialize net inuse counters");
return 0;
}
static int __init init_default_flow_dissectors(void)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
return 0;
}
static int __init netlink_proto_init(void)
{
int i;
int err = proto_register(&netlink_proto, 0);
nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
for (i = 0; i < MAX_LINKS; i++) {
rhashtable_init(&nl_table[i].hash, &netlink_rhashtable_params);
}
/* The netlink device handler may be needed early. */
rtnetlink_init();
void __init rtnetlink_init(void)
{
register_pernet_subsys(&rtnetlink_net_ops);
int register_pernet_subsys(struct pernet_operations *ops)
{
int error;
down_write(&pernet_ops_rwsem);
error = register_pernet_operations(first_device, ops);
up_write(&pernet_ops_rwsem);
return error;
}
}
return err;
}
static int __net_init rtnetlink_net_init(struct net *net)
{
struct sock *sk;
struct netlink_kernel_cfg cfg = {
.groups = RTNLGRP_MAX,
.input = rtnetlink_rcv,
.cb_mutex = &rtnl_mutex,
.flags = NL_CFG_F_NONROOT_RECV,
.bind = rtnetlink_bind,
};
sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg);
static inline struct sock *
netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
{
return __netlink_kernel_create(net, unit, THIS_MODULE, cfg);
struct sock *
__netlink_kernel_create(struct net *net, int unit, struct module *module,
struct netlink_kernel_cfg *cfg)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
struct socket *sock;
struct sock *sk;
struct netlink_sock *nlk;
struct listeners *listeners = NULL;
struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
unsigned int groups;
sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock);
int sock_create_lite(int family, int type, int protocol, struct socket **res)
{
int err;
struct socket *sock = NULL;
sock = sock_alloc();
sock->type = type;
*res = sock;
return err;
}
__netlink_create(net, sock, cb_mutex, unit, 1);
static int __netlink_create(struct net *net, struct socket *sock,
struct mutex *cb_mutex, int protocol,
int kern)
{
struct sock *sk;
struct netlink_sock *nlk;
sock->ops = &netlink_ops;
sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern);
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot, int kern)
{
struct sock *sk;
sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
int family)
{
struct sock *sk;
struct kmem_cache *slab;
slab = prot->slab;
if (slab != NULL) {
sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
if (want_init_on_alloc(priority)) {
sk_prot_clear_nulls(sk, prot->obj_size);
}
} else {
sk = kmalloc(prot->obj_size, priority);
}
return sk;
}
if (sk) {
sk->sk_family = family;
/*
* See comment in struct sock definition to understand
* why we need sk_prot_creator -acme
*/
sk->sk_prot = sk->sk_prot_creator = prot;
sk->sk_kern_sock = kern;
sock_lock_init(sk);
sock_net_set(sk, net);
static inline
void sock_net_set(struct sock *sk, struct net *net)
{
write_pnet(&sk->sk_net, net);
static inline void write_pnet(possible_net_t *pnet, struct net *net)
{
pnet->net = net;
}
}
}
return sk;
}
sock_init_data(sock, sk);
void sock_init_data(struct socket *sock, struct sock *sk)
{
kuid_t uid = sock ?
SOCK_INODE(sock)->i_uid :
make_kuid(sock_net(sk)->user_ns, 0);
sock_init_data_uid(sock, sk, uid);
}
sk->sk_destruct = netlink_sock_destruct;
sk->sk_protocol = protocol;
return 0;
}
sk = sock->sk;
sk->sk_data_ready = netlink_data_ready;
nlk = nlk_sk(sk);
static inline struct netlink_sock *nlk_sk(struct sock *sk)
{
return container_of(sk, struct netlink_sock, sk);
}
nlk->flags |= NETLINK_F_KERNEL_SOCKET;
return sk;
}
}
net->rtnl = sk;
return 0;
}
static int __init genl_init(void)
{
int err;
err = register_pernet_subsys(&genl_pernet_ops);
int register_pernet_subsys(struct pernet_operations *ops)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
int error;
down_write(&pernet_ops_rwsem);
error = register_pernet_operations(first_device, ops);
up_write(&pernet_ops_rwsem);
return error;
}
return 0;
}
static int __net_init genl_pernet_init(struct net *net)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
.bind = genl_bind,
};
/* we'll bump the group number right afterwards */
net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC, &cfg);
static inline struct sock *
netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
{
return __netlink_kernel_create(net, unit, THIS_MODULE, cfg);
}
return 0;
}
============================ app ================
__do_sys_socket
SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{
return __sys_socket(family, type, protocol);
int __sys_socket(int family, int type, int protocol)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
struct socket *sock;
int flags;
sock = __sys_socket_create(family, type, protocol);
static struct socket *__sys_socket_create(int family, int type, int protocol)
{
struct socket *sock;
int retval;
type &= SOCK_TYPE_MASK;
retval = sock_create(family, type, protocol, &sock);
int sock_create(int family, int type, int protocol, struct socket **res)
{
return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
int __sock_create(struct net *net, int family, int type, int protocol,
struct socket **res, int kern)
{
int err;
struct socket *sock;
const struct net_proto_family *pf;
sock = sock_alloc(); <tag3> <tag27>
sock->type = type; <tag4> <tag28>
rcu_read_lock();
pf = rcu_dereference(net_families[family]);
err = -EAFNOSUPPORT;
try_module_get(pf->owner);
/* Now protected by module ref count */
rcu_read_unlock();
/* unix_create */
err = pf->create(net, sock, protocol, kern);
static int unix_create(struct net *net, struct socket *sock, int protocol, int kern)
{
struct sock *sk;
sock->state = SS_UNCONNECTED;
switch (sock->type) {
case SOCK_STREAM:
sock->ops = &unix_stream_ops; <tag5>
break;
}
sk = unix_create1(net, sock, kern, sock->type);
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
struct unix_sock *u;
struct sock *sk;
int err;
atomic_long_inc(&unix_nr_socks);
atomic_long_read(&unix_nr_socks);
if (type == SOCK_STREAM)
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); <tag6><tag29>
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot, int kern)
{
struct sock *sk;
sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
int family)
{
struct sock *sk;
struct kmem_cache *slab;
slab = prot->slab;
if (slab != NULL) {
sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
if (want_init_on_alloc(priority)) {
sk_prot_clear_nulls(sk, prot->obj_size);
}
}
return sk;
}
if (sk) {
sk->sk_family = family;
/*
* See comment in struct sock definition to understand
* why we need sk_prot_creator -acme
*/
sk->sk_prot = sk->sk_prot_creator = prot;
sk->sk_kern_sock = kern;
sock_lock_init(sk);
sock_net_set(sk, net);
void sock_net_set(struct sock *sk, struct net *net)
{
write_pnet(&sk->sk_net, net);
static inline void write_pnet(possible_net_t *pnet, struct net *net)
{
pnet->net = net;
}
}
}
return sk;
}
sock_init_data(sock, sk);
void sock_init_data(struct socket *sock, struct sock *sk)
{
kuid_t uid = sock ?
SOCK_INODE(sock)->i_uid :
make_kuid(sock_net(sk)->user_ns, 0);
sock_init_data_uid(sock, sk, uid);
void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
{
sk_init_common(sk);
sk->sk_send_head = NULL;
timer_setup(&sk->sk_timer, NULL, 0);
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
sk->sk_state = TCP_CLOSE;<tag44>
sk->sk_use_task_frag = true;
sk_set_socket(sk, sock);
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
{
sk->sk_socket = sock; <tag76>
}
sock_set_flag(sk, SOCK_ZAPPED);
static inline void sock_set_flag(struct sock *sk, enum sock_flags flag)
{
__set_bit(flag, &sk->sk_flags);
}
if (sock) {
sk->sk_type = sock->type; <tag8>
RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
sock->sk = sk; <tag7> <tag30>
}
sk->sk_uid = uid;
rwlock_init(&sk->sk_callback_lock);
if (sk->sk_kern_sock) {
lockdep_set_class_and_name(
&sk->sk_callback_lock,
af_kern_callback_keys + sk->sk_family,
af_family_kern_clock_key_strings[sk->sk_family]);
}
sk->sk_state_change = sock_def_wakeup;
sk->sk_data_ready = sock_def_readable;
sk->sk_write_space = sock_def_write_space;
sk->sk_error_report = sock_def_error_report;
sk->sk_destruct = sock_def_destruct;
sk->sk_frag.page = NULL;
sk->sk_frag.offset = 0;
sk->sk_peek_off = -1;
sk->sk_peer_pid = NULL;
sk->sk_peer_cred = NULL;
spin_lock_init(&sk->sk_peer_lock);
sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_stamp = SK_DEFAULT_STAMP;
atomic_set(&sk->sk_zckey, 0);
sk->sk_napi_id = 0;
sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
sk->sk_max_pacing_rate = ~0UL;
sk->sk_pacing_rate = ~0UL;
WRITE_ONCE(sk->sk_pacing_shift, 10);
sk->sk_incoming_cpu = -1;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.rst for details)
*/
smp_wmb();
refcount_set(&sk->sk_refcnt, 1);
atomic_set(&sk->sk_drops, 0);
}
}
sk->sk_hash = unix_unbound_hash(sk);
sk->sk_allocation = GFP_KERNEL_ACCOUNT;
sk->sk_write_space = unix_write_space;
sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
sk->sk_destruct = unix_sock_destructor;
u = unix_sk(sk);
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
atomic_long_set(&u->inflight, 0);
INIT_LIST_HEAD(&u->link);
mutex_init(&u->iolock); /* single task reading lock */
mutex_init(&u->bindlock); /* single task binding lock */
init_waitqueue_head(&u->peer_wait);
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
memset(&u->scm_stat, 0, sizeof(struct scm_stat));
return sk;
}
return 0;
}
/*
* Now to bump the refcnt of the [loadable] module that owns this
* socket at sock_release time we decrement its refcnt.
*/
try_module_get(sock->ops->owner);
/*
* Now that we're done with the ->create function, the [loadable]
* module can have its refcnt decremented
*/
module_put(pf->owner);
*res = sock;
return 0;
}
}
return sock;
}
flags = type & ~SOCK_TYPE_MASK;
return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
static int sock_map_fd(struct socket *sock, int flags)
{
struct file *newfile;
int fd = get_unused_fd_flags(flags);
newfile = sock_alloc_file(sock, flags, NULL);
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{
struct file *file;
if (!dname) {
/* dname = "UNIX-STREAM" */
dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
}
file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
O_RDWR | (flags & O_NONBLOCK),
&socket_file_ops); <tag9> <tag31>
sock->file = file; <tag10> <tag32>
file->private_data = sock; <tag11> <tag33>
return file;
}
if (!IS_ERR(newfile)) {
fd_install(fd, newfile);
return fd;
}
}
}
}
__do_sys_bind
SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
{
return __sys_bind(fd, umyaddr, addrlen);
int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
{
struct socket *sock;
struct sockaddr_storage address;
int err, fput_needed;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
{
struct fd f = fdget(fd);
struct socket *sock;
if (f.file) {
sock = sock_from_file(f.file);
struct socket *sock_from_file(struct file *file)
{
if (file->f_op == &socket_file_ops)
return file->private_data; /* set in sock_alloc_file */
}
if (likely(sock)) {
*fput_needed = f.flags & FDPUT_FPUT;
return sock;
}
}
}
if (sock) {
err = move_addr_to_kernel(umyaddr, addrlen, &address);
int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
{
if (copy_from_user(kaddr, uaddr, ulen)) <tag12>
return -EFAULT;
return 0;
}
/* unix_bind */
err = sock->ops->bind(sock,
(struct sockaddr *)
&address, addrlen);
static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
struct sock *sk = sock->sk;
int err;
if (sunaddr->sun_path[0]) {
err = unix_bind_bsd(sk, sunaddr, addr_len);
static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
int addr_len)
{
umode_t mode = S_IFSOCK |
(SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
struct net *net = sock_net(sk);
struct net *sock_net(const struct sock *sk)
{
return read_pnet(&sk->sk_net);
static inline struct net *read_pnet(const possible_net_t *pnet)
{
return pnet->net;
}
}
struct mnt_idmap *idmap;
struct unix_address *addr;
struct dentry *dentry;
struct path parent;
int err;
addr_len = strlen(sunaddr->sun_path) +
offsetof(struct sockaddr_un, sun_path) + 1;
addr = unix_create_addr(sunaddr, addr_len);
static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
int addr_len)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
struct unix_address *addr;
addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); <tag13>
refcount_set(&addr->refcnt, 1);
addr->len = addr_len;
memcpy(addr->name, sunaddr, addr_len); <tag14>
/* name server_socket */ /* 对应 app 的socket名字. */
pr_err("%s:in %d. name %s\n",__func__,__LINE__, sunaddr->sun_path);
return addr;
}
/*
* Get the parent directory, calculate the hash for last
* component.
*/
/* 以sun_path内的字符串为名, 创建文件. */
dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
/*
* All right, let's create it.
*/
idmap = mnt_idmap(parent.mnt);
err = security_path_mknod(&parent, dentry, mode, 0);
err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
err = mutex_lock_interruptible(&u->bindlock);
new_hash = unix_bsd_hash(d_backing_inode(dentry));
unix_table_double_lock(net, old_hash, new_hash);
u->path.mnt = mntget(parent.mnt);
u->path.dentry = dget(dentry); <tag73>
__unix_set_addr_hash(net, sk, addr, new_hash);
static void __unix_set_addr_hash(struct net *net, struct sock *sk,
struct unix_address *addr, unsigned int hash)
{
__unix_remove_socket(sk);
smp_store_release(&unix_sk(sk)->addr, addr);
sk->sk_hash = hash;
__unix_insert_socket(net, sk);
static void __unix_insert_socket(struct net *net, struct sock *sk)
{
sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
static inline void sk_add_node(struct sock *sk, struct hlist_head *list)
{
sock_hold(sk);
__sk_add_node(sk, list);
}
}
}
unix_table_double_unlock(net, old_hash, new_hash);
unix_insert_bsd_socket(sk);
static void unix_insert_bsd_socket(struct sock *sk)
{
spin_lock(&bsd_socket_locks[sk->sk_hash]);
sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
void sk_add_bind_node(struct sock *sk, struct hlist_head *list)
{
hlist_add_head(&sk->sk_bind_node, list); <tag65>
}
spin_unlock(&bsd_socket_locks[sk->sk_hash]);
}
mutex_unlock(&u->bindlock);
done_path_create(&parent, dentry);
return 0;
}
}
return err;
}
fput_light(sock->file, fput_needed);
}
return err;
}
}
__do_sys_listen
SYSCALL_DEFINE2(listen, int, fd, int, backlog)
{
return __sys_listen(fd, backlog);
int __sys_listen(int fd, int backlog)
{
struct socket *sock;
int err, fput_needed;
int somaxconn;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
/* unix_listen */
err = sock->ops->listen(sock, backlog);
static int unix_listen(struct socket *sock, int backlog)
{
int err;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
unix_state_lock(sk);
sk->sk_max_ack_backlog = backlog;
sk->sk_state = TCP_LISTEN; <tag15>
/* set credentials so connect can copy them */
err = 0;
return err;
}
fput_light(sock->file, fput_needed);
}
return err;
}
}
__do_sys_accept
SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
int __user *, upeer_addrlen, int, flags)
{
return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
int ret = -EBADF;
struct fd f;
f = fdget(fd);
if (f.file) {
ret = __sys_accept4_file(f.file, upeer_sockaddr, upeer_addrlen, flags);
static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
struct file *newfile;
int newfd;
newfd = get_unused_fd_flags(flags);
newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen, flags);
struct file *do_accept(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
struct socket *sock, *newsock;
struct file *newfile;
int err, len;
struct sockaddr_storage address;
sock = sock_from_file(file);
struct socket *sock_from_file(struct file *file)
{
if (file->f_op == &socket_file_ops)
return file->private_data; /* set in sock_alloc_file */
}
newsock = sock_alloc(); <tag16>
newsock->type = sock->type; <tag17>
newsock->ops = sock->ops;
/*
* We don't need try_module_get here, as the listening socket (sock)
* has the protocol module (sock->ops->owner) held.
*/
__module_get(newsock->ops->owner);
newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{
struct file *file;
if (!dname) {
/* dname = "UNIX-STREAM" */
dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
}
file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
O_RDWR | (flags & O_NONBLOCK),
&socket_file_ops); <tag18>
sock->file = file; <tag19>
file->private_data = sock; <tag20>
return file;
}
/* unix_accept */
err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags, false);
static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern)
{
struct sock *sk = sock->sk;
struct sock *tsk;
struct sk_buff *skb;
int err;
/* If socket state is TCP_LISTEN it cannot change (for now...),
* so that no locks are necessary.
*/
skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, &err);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
int *err)
{
int off = 0;
return __skb_recv_datagram(sk, &sk->sk_receive_queue, flags, &off, err);
struct sk_buff *__skb_recv_datagram(struct sock *sk,
struct sk_buff_head *sk_queue,
unsigned int flags, int *off, int *err)
{
struct sk_buff *skb, *last;
long timeo;
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
{
return noblock ? 0 : sk->sk_rcvtimeo;
}
do {
skb = __skb_try_recv_datagram(sk, sk_queue, flags, off, err, &last);
struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
struct sk_buff_head *queue,
unsigned int flags, int *off, int *err,
struct sk_buff **last)
{
struct sk_buff *skb;
unsigned long cpu_flags;
/*
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
*/
int error;
do {
/* Again only user level code calls this function, so nothing
* interrupt level will suddenly eat the receive_queue.
*
* Look at current nfs client by the way...
* However, this function was correct in any case. 8)
*/
spin_lock_irqsave(&queue->lock, cpu_flags);
skb = __skb_try_recv_from_queue(sk, queue, flags, off, &error, last);
struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
struct sk_buff_head *queue,
unsigned int flags,
int *off, int *err,
struct sk_buff **last)
{
bool peek_at_off = false;
struct sk_buff *skb;
int _off = 0;
*last = queue->prev;
skb_queue_walk(queue, skb)
#define skb_queue_walk(queue, skb)
/* 在queue里找到一个 skb, 就返回该 skb. */
for (skb = (queue)->next;
skb != (struct sk_buff *)(queue);
skb = skb->next)
{
*off = _off;
return skb; <tag21>
}
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
if (skb)
return skb;
} while (READ_ONCE(queue->prev) != *last);
}
if (skb)
return skb;
} while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, err, &timeo, last));
int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
int *err, long *timeo_p,
const struct sk_buff *skb)
{
int error;
DEFINE_WAIT_FUNC(wait, receiver_wake_function);
prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (READ_ONCE(queue->prev) != skb)
goto out;
/* handle signals */
if (signal_pending(current))
goto interrupted;
error = 0;
*timeo_p = schedule_timeout(*timeo_p);
return error;
}
}
}
tsk = skb->sk; <tag23>
wake_up_interruptible(&unix_sk(sk)->peer_wait);
/* attach accepted sock to socket */
unix_state_lock(tsk);
newsock->state = SS_CONNECTED;
sock_graft(tsk, newsock);
static inline void sock_graft(struct sock *sk, struct socket *parent)
{
WARN_ON(parent->sk);
write_lock_bh(&sk->sk_callback_lock);
rcu_assign_pointer(sk->sk_wq, &parent->wq);
parent->sk = sk; <tag25>
sk->sk_uid = SOCK_INODE(parent)->i_uid;
security_sock_graft(sk, parent);
write_unlock_bh(&sk->sk_callback_lock);
}
unix_state_unlock(tsk);
return 0;
}
/* File flags are not inherited via accept() unlike another OSes. */
return newfile;
}
fd_install(newfd, newfile);
return newfd;
}
fdput(f);
}
return ret;
}
}
__do_sys_connect
SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
int, addrlen)
{
return __sys_connect(fd, uservaddr, addrlen);
int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
{
int ret = -EBADF;
struct fd f;
f = fdget(fd);
if (f.file) {
struct sockaddr_storage address;
ret = move_addr_to_kernel(uservaddr, addrlen, &address); <tag47>
ret = __sys_connect_file(f.file, &address, addrlen, 0);
int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
int addrlen, int file_flags)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
struct socket *sock;
int err;
/* client sock */
sock = sock_from_file(file);
/* unix_stream_connect */
err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
sock->file->f_flags | file_flags);
static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; <tag41>
struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
struct unix_sock *u = unix_sk(sk), *newu, *otheru;
struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
long timeo;
int err;
int st;
/* create new sock for complete connection */
newsk = unix_create1(net, NULL, 0, sock->type); <tag35>
err = -ENOMEM;
/* Allocate skb for sending to listening sock */
skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
gfp_t priority)
{
struct sk_buff *skb = alloc_skb(size, priority);
static inline struct sk_buff *alloc_skb(unsigned int size,
gfp_t priority)
{
return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
int flags, int node)
{
struct kmem_cache *cache;
struct sk_buff *skb;
bool pfmemalloc;
u8 *data;
cache = skbuff_cache;
//(flags & SKB_ALLOC_FCLONE) ? skbuff_fclone_cache : skbuff_cache;
sk_memalloc_socks();
static inline int sk_memalloc_socks(void)
{
return static_branch_unlikely(&memalloc_socks_key);
}
/* Get the HEAD */
skb = kmem_cache_alloc_node(cache, gfp_mask & ~GFP_DMA, node); <tag36>
prefetchw(skb);
data = kmalloc_reserve(&size, gfp_mask, node, &pfmemalloc); <tag37>
memset(skb, 0, offsetof(struct sk_buff, tail));
__build_skb_around(skb, data, size);
static void __build_skb_around(struct sk_buff *skb, void *data,
unsigned int frag_size)
{
unsigned int size = frag_size;
__finalize_skb_around(skb, data, size);
static inline void __finalize_skb_around(struct sk_buff *skb,
void *data, unsigned int size)
{
struct skb_shared_info *shinfo;
size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
/* Assumes caller memset cleared SKB */
skb->truesize = SKB_TRUESIZE(size);
refcount_set(&skb->users, 1);
skb->head = data; <tag38>
skb->data = data; <tag39>
skb_set_end_offset(skb, size);
skb->mac_header = (typeof(skb->mac_header))~0U;
skb->transport_header = (typeof(skb->transport_header))~0U;
skb->alloc_cpu = raw_smp_processor_id();
}
}
skb->pfmemalloc = pfmemalloc;
return skb;
}
}
if (skb) {
skb_set_owner_w(skb, sk);
void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
skb->sk = sk; <tag40>
skb->destructor = sock_wfree;
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
}
return skb;
}
return NULL;
}
/* Find listening sock. */
other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
static struct sock *unix_find_other(struct net *net,
struct sockaddr_un *sunaddr,
int addr_len, int type)
{
struct sock *sk;
if (sunaddr->sun_path[0])
sk = unix_find_bsd(sunaddr, addr_len, type);
static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, int type)
{
struct inode *inode;
struct path path;
struct sock *sk;
int err;
err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
/* "server_socket" */
pr_err("%s:in %d. path %s\n",__func__,__LINE__, path.dentry->d_iname);
err = -ECONNREFUSED;
inode = d_backing_inode(path.dentry);
sk = unix_find_socket_byinode(inode); <tag43>
static struct sock *unix_find_socket_byinode(struct inode *i)
{
unsigned int hash = unix_bsd_hash(i);
struct sock *s;
spin_lock(&bsd_socket_locks[hash]);
sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
struct dentry *dentry = unix_sk(s)->path.dentry;
static inline struct unix_sock *unix_sk(const struct sock *sk)
{
return (struct unix_sock *)sk;
}
if (dentry && d_backing_inode(dentry) == i) {
sock_hold(s);
spin_unlock(&bsd_socket_locks[hash]);
return s;
}
}
}
path_put(&path);
return sk;
}
return sk;
}
/* Latch state of peer */
unix_state_lock(other);
st = sk->sk_state; <tag44>
switch (st) {
case TCP_CLOSE:
/* This is ok... continue with connect */
break;
}
unix_state_lock_nested(sk);
/* The way is open! Fastly set all the necessary fields... */
sock_hold(sk);
newsk->sk_state = TCP_ESTABLISHED; <tag71>
newsk->sk_type = sk->sk_type;
/* Set credentials */
sock->state = SS_CONNECTED; <tag46>
sk->sk_state = TCP_ESTABLISHED; <tag45>
sock_hold(newsk);
smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
unix_peer(sk) = newsk; <tag72>
#define unix_peer(sk) (unix_sk(sk)->peer)
unix_state_unlock(sk);
/* take ten and send info to listening sock */
spin_lock(&other->sk_receive_queue.lock);
__skb_queue_tail(&other->sk_receive_queue, skb); <tag70>
static inline void __skb_queue_tail(struct sk_buff_head *list,
struct sk_buff *newsk)
{
__skb_queue_before(list, (struct sk_buff *)list, newsk);
static inline void __skb_queue_before(struct sk_buff_head *list,
struct sk_buff *next,
struct sk_buff *newsk)
{
__skb_insert(newsk, ((struct sk_buff_list *)next)->prev, next, list);
static inline void __skb_insert(struct sk_buff *newsk,
struct sk_buff *prev, struct sk_buff *next,
struct sk_buff_head *list)
{
/* See skb_queue_empty_lockless() and skb_peek_tail()
* for the opposite READ_ONCE()
*/
WRITE_ONCE(newsk->next, next);
WRITE_ONCE(newsk->prev, prev);
WRITE_ONCE(((struct sk_buff_list *)next)->prev, newsk);
WRITE_ONCE(((struct sk_buff_list *)prev)->next, newsk);
WRITE_ONCE(list->qlen, list->qlen + 1);
}
}
}
spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
/* sock_def_readable */
other->sk_data_ready(other);
void sock_def_readable(struct sock *sk)
{
struct socket_wq *wq;
trace_sk_data_ready(sk);
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
EPOLLRDNORM | EPOLLRDBAND);
rcu_read_unlock();
}
sock_put(other);
return 0;
}
return err;
}
fdput(f);
}
return ret;
}
}
write()
sock_write_iter
static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct socket *sock = file->private_data;
struct msghdr msg = {.msg_iter = *from, .msg_iocb = iocb}; <tag50>
ssize_t res;
res = sock_sendmsg(sock, &msg);
int sock_sendmsg(struct socket *sock, struct msghdr *msg)
{
return sock_sendmsg_nosec(sock, msg);
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
/* unix_stream_sendmsg */
int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
inet_sendmsg, sock, msg, msg_data_left(msg));
static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
struct sock *sk = sock->sk;
struct sock *other = NULL;
int err, size;
struct sk_buff *skb;
int sent = 0;
struct scm_cookie scm;
bool fds_sent = false;
int data_len;
err = -EOPNOTSUPP;
{
err = -ENOTCONN;
other = unix_peer(sk); <tag48>
#define unix_peer(sk) (unix_sk(sk)->peer)
static inline struct unix_sock *unix_sk(const struct sock *sk)
{
return (struct unix_sock *)sk;
}
}
//while (sent < len)
{
size = len - sent;
/* Keep two messages in the pipe so it schedules better */
size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
/* allow fallback to order-0 allocations */
size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
msg->msg_flags & MSG_DONTWAIT, &err,
get_order(UNIX_SKB_FRAGS_SZ));
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
unsigned long data_len, int noblock,
int *errcode, int max_page_order)
{
struct sk_buff *skb;
long timeo;
int err;
sk_wmem_alloc_get(sk);
static inline int sk_wmem_alloc_get(const struct sock *sk)
{
return refcount_read(&sk->sk_wmem_alloc) - 1;
}
skb = alloc_skb_with_frags(header_len, data_len, max_page_order, errcode, sk->sk_allocation);
struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
unsigned long data_len,
int max_page_order,
int *errcode,
gfp_t gfp_mask)
{
int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
unsigned long chunk;
struct sk_buff *skb;
struct page *page;
int i;
skb = alloc_skb(header_len, gfp_mask); <tag49>
skb->truesize += npages << PAGE_SHIFT;
return skb;
}
return skb;
}
fds_sent = true;
skb_put(skb, size - data_len);
skb->data_len = data_len;
skb->len = size;
err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
struct iov_iter *from,
int len)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
struct sk_buff *frag_iter;
/* Copy header. */
if (copy > 0) {
copy_from_iter(skb->data + offset, copy, from); <tag52>
if ((len -= copy) == 0) {
return 0;
}
}
}
unix_state_lock(other);
skb_queue_tail(&other->sk_receive_queue, skb); <tag55>
unix_state_unlock(other);
sent += size;
}
return sent;
}
return ret;
}
}
*from = msg.msg_iter;
return res;
}
read()
sock_read_iter
static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
pr_err("%s:in %d.\n",__func__,__LINE__);
struct file *file = iocb->ki_filp;
struct socket *sock = file->private_data;
struct msghdr msg = {.msg_iter = *to,
.msg_iocb = iocb}; <tag57>
ssize_t res;
/* server newsocket */
res = sock_recvmsg(sock, &msg, msg.msg_flags);
int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
{
return sock_recvmsg_nosec(sock, msg, flags);
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
/* unix_stream_recvmsg */
int ret = INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
inet_recvmsg, sock, msg,
msg_data_left(msg), flags);
static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
struct unix_stream_read_state state = {
.recv_actor = unix_stream_read_actor,
.socket = sock,
.msg = msg,
.size = size,
.flags = flags
};
return unix_stream_read_generic(&state, true);
static int unix_stream_read_generic(struct unix_stream_read_state *state,
bool freezable)
{
struct scm_cookie scm;
struct socket *sock = state->socket;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
int copied = 0;
int flags = state->flags;
int noblock = flags & MSG_DONTWAIT;
bool check_creds = false;
int target;
int err = 0;
long timeo;
int skip;
size_t size = state->size;
unsigned int last_len;
memset(&scm, 0, sizeof(scm));
/* Lock the socket to prevent queue disordering
* while sleeps in memcpy_tomsg
*/
mutex_lock(&u->iolock);
skip = 0; //max(sk_peek_offset(sk, flags), 0);
{
int chunk;
bool drop_skb;
struct sk_buff *skb, *last;
redo:
unix_state_lock(sk);
last = skb = skb_peek(&sk->sk_receive_queue);
static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_)
{
struct sk_buff *skb = list_->next;
return skb; <tag58>
}
last_len = last ? last->len : 0;
again:
if (skb == NULL) { //
unix_state_unlock(sk);
mutex_unlock(&u->iolock);
timeo = unix_stream_data_wait(sk, timeo, last,
last_len, freezable);
mutex_lock(&u->iolock);
goto redo;
}
unix_state_unlock(sk);
/* Copy address just once */
chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
skb_get(skb);
/* unix_stream_read_actor */
chunk = state->recv_actor(skb, skip, chunk, state);
static int unix_stream_read_actor(struct sk_buff *skb,
int skip, int chunk,
struct unix_stream_read_state *state)
{
int ret;
ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, state->msg, chunk);
static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
struct msghdr *msg, int size)
{
return skb_copy_datagram_iter(from, offset, &msg->msg_iter, size);
int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
struct iov_iter *to, int len)
{
return __skb_datagram_iter(skb, offset, to, len, false, simple_copy_to_iter, NULL);
static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
struct iov_iter *to, int len, bool fault_short,
size_t (*cb)(const void *, size_t, void *,
struct iov_iter *), void *data)
{
int start = skb_headlen(skb);
int i, copy = start - offset, start_off = offset, n;
struct sk_buff *frag_iter;
/* Copy header. */
if (copy > 0) {
if (copy > len)
copy = len;
/* simple_copy_to_iter */
n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
skb->data + offset, copy, data, to);
static size_t simple_copy_to_iter(const void *addr, size_t bytes,
void *data __always_unused, struct iov_iter *i)
{
return copy_to_iter(addr, bytes, i); <tag59>
}
offset += n;
if ((len -= copy) == 0) {
return 0;
}
}
}
}
}
return ret ?: chunk;
}
/* skb is only safe to use if !drop_skb */
copied += chunk;
size -= chunk;
}
mutex_unlock(&u->iolock);
out:
return copied ? : err;
}
}
return ret;
}
}
*to = msg.msg_iter;
return res;
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。