/*
* perform the socket system call. we locate the appropriate family, then
* create a fresh socket.
*/
static int
sock_socket(int family, int type, int protocol)
{
int i, fd;
struct socket *sock;
struct proto_ops *ops;
PRINTK("sys_socket: family = %d (%s), type = %d, protocol = %d\n",
family, family_name(family), type, protocol);
/*
* locate the correct protocol family
*/
// 是否支持协议族
for (i = 0; i < NPROTO; ++i)
if (proto_table[i].family == family)
break;
if (i == NPROTO) {
PRINTK("sys_socket: family not found\n");
return -EINVAL;
}
ops = proto_table[i].ops;
/*
* check that this is a type that we know how to manipulate and
* the protocol makes sense here. the family can still reject the
* protocol later.
*/
// 是否是支持的类型
if ((type != SOCK_STREAM &&
type != SOCK_DGRAM &&
type != SOCK_SEQPACKET &&
type != SOCK_RAW) ||
protocol < 0)
return -EINVAL;
/*
* allocate the socket and allow the family to set things up. if
* the protocol is 0, the family is instructed to select an appropriate
* default.
*/
// 同步阻塞从sockets[]数组中分配socket结构体和inode
if (!(sock = sock_alloc(1))) {
printk("sys_socket: no more sockets\n");
return -EAGAIN;
}
sock->type = type;
// 见上述proto_table数组中,unix socket 对应的是unix_proto_ops
// 对应的create回调函数是unix_proto_create
sock->ops = ops;
if ((i = sock->ops->create(sock, protocol)) < 0) {
sock_release(sock);
return i;
}
// 获取fd
if ((fd = get_fd(SOCK_INODE(sock))) < 0) {
sock_release(sock);
return -EINVAL;
}
return fd;
}
// 从当前进程打开文件数组中获取一个空槽,下标为fd
// 该打开文件的f_inode 指向inode
static int
get_fd(struct inode *inode)
{
int fd, i;
struct file *file;
/*
* find a file descriptor suitable for return to the user.
*/
for (fd = 0; fd < NR_OPEN; ++fd)
if (!current->filp[fd])
break;
if (fd == NR_OPEN)
return -1;
current->close_on_exec &= ~(1 << fd);
for (file = file_table, i = 0; i < NR_FILE; ++i, ++file)
if (!file->f_count)
break;
if (i == NR_FILE)
return -1;
current->filp[fd] = file;
file->f_op = &socket_file_ops;
file->f_mode = 3;
file->f_flags = 0;
file->f_count = 1;
file->f_inode = inode;
file->f_pos = 0;
return fd;
}
static int
unix_proto_connect(struct socket *sock, struct sockaddr *uservaddr,
int sockaddr_len, int flags)
{
int i;
struct unix_proto_data *serv_upd;
struct sockaddr_un sockun;
PRINTK("unix_proto_connect: socket 0x%x, servlen=%d\n", sock,
sockaddr_len);
if (sockaddr_len <= UN_PATH_OFFSET ||
sockaddr_len >= sizeof(struct sockaddr_un)) {
PRINTK("unix_proto_connect: bad length %d\n", sockaddr_len);
return -EINVAL;
}
verify_area(uservaddr, sockaddr_len);
memcpy_fromfs(&sockun, uservaddr, sockaddr_len);
if (sockun.sun_family != AF_UNIX) {
PRINTK("unix_proto_connect: family is %d, not AF_UNIX (%d)\n",
sockun.sun_family, AF_UNIX);
return -EINVAL;
}
// 根据unix socket的监听的文件,直接从内核查找服务端unix socket 结构体
if (!(serv_upd = unix_data_lookup(&sockun, sockaddr_len))) {
PRINTK("unix_proto_connect: can't locate peer\n");
return -EINVAL;
}
if ((i = sock_awaitconn(sock, serv_upd->socket)) < 0) {
PRINTK("unix_proto_connect: can't await connection\n");
return i;
}
unix_data_ref(UN_DATA(sock->conn));
UN_DATA(sock)->peerupd = UN_DATA(sock->conn); /* ref server */
return 0;
}
int
sock_awaitconn(struct socket *mysock, struct socket *servsock)
{
struct socket *last;
PRINTK("sock_awaitconn: trying to connect socket 0x%x to 0x%x\n",
mysock, servsock);
if (!(servsock->flags & SO_ACCEPTCON)) {
PRINTK("sock_awaitconn: server not accepting connections\n");
return -EINVAL;
}
/*
* put ourselves on the server's incomplete connection queue.
*/
mysock->next = NULL;
cli();
// 服务端通过一个队列支持多个客户端的连接
if (!(last = servsock->iconn))
servsock->iconn = mysock;
else {
while (last->next)
last = last->next;
last->next = mysock;
}
mysock->state = SS_CONNECTING;
mysock->conn = servsock;
sti();
/*
* wake up server, then await connection. server will set state to
* SS_CONNECTED if we're connected.
*/
// 唤醒服务端进程处理连接
wake_up(servsock->wait);
if (mysock->state != SS_CONNECTED) {
// 客户端进程阻塞等待服务端程序接收连接
interruptible_sleep_on(mysock->wait);
if (mysock->state != SS_CONNECTED) {
/*
* if we're not connected we could have been
* 1) interrupted, so we need to remove ourselves
* from the server list
* 2) rejected (mysock->conn == NULL), and have
* already been removed from the list
*/
if (mysock->conn == servsock) {
cli();
if ((last = servsock->iconn) == mysock)
servsock->iconn = mysock->next;
else {
while (last->next != mysock)
last = last->next;
last->next = mysock->next;
}
sti();
}
return mysock->conn ? -EINTR : -EACCES;
}
}
return 0;
}
客户端调用accept发起连接后,服务端程序调用accept接收连接。
// net/socket.c 文件
static int
sock_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen)
{
struct file *file;
struct socket *sock, *newsock;
int i;
PRINTK("sys_accept: fd = %d\n", fd);
if (!(sock = sockfd_lookup(fd, &file)))
return -EBADF;
if (sock->state != SS_UNCONNECTED) {
PRINTK("sys_accept: socket isn't unconnected\n");
return -EINVAL;
}
if (!(sock->flags & SO_ACCEPTCON)) {
PRINTK("sys_accept: socket not accepting connections!\n");
return -EINVAL;
}
// 为服务端程序新的连接分配一个socket结构体
if (!(newsock = sock_alloc(0))) {
printk("sys_accept: no more sockets\n");
return -EAGAIN;
}
newsock->type = sock->type;
newsock->ops = sock->ops;
if ((i = sock->ops->dup(newsock, sock)) < 0) {
sock_release(newsock);
return i;
}
// 为新连接socket从进程打开文件列表寻找一个空槽,返回对应的下标fd
if ((fd = get_fd(SOCK_INODE(newsock))) < 0) {
sock_release(newsock);
return -EINVAL;
}
// 如果是unix socket,则调用unix_proto_accept函数,从半连接队列取一个连接
i = newsock->ops->accept(sock, newsock, file->f_flags);
if ( i < 0)
{
sys_close (fd);
return (i);
}
PRINTK("sys_accept: connected socket 0x%x via 0x%x\n",
sock, newsock);
if (upeer_sockaddr)
newsock->ops->getname(newsock, upeer_sockaddr,
upeer_addrlen, 1);
return fd;
}
// net/unix.c文件
static int
unix_proto_accept(struct socket *sock, struct socket *newsock, int flags)
{
struct socket *clientsock;
PRINTK("unix_proto_accept: socket 0x%x accepted via socket 0x%x\n",
sock, newsock);
/*
* if there aren't any sockets awaiting connection, then wait for
* one, unless nonblocking
*/
while (!(clientsock = sock->iconn)) {
if (flags & O_NONBLOCK)
return -EAGAIN;
interruptible_sleep_on(sock->wait);
if (current->signal & ~current->blocked) {
PRINTK("sys_accept: sleep was interrupted\n");
return -ERESTARTSYS;
}
}
/*
* great. finish the connection relative to server and client,
* wake up the client and return the new fd to the server
*/
sock->iconn = clientsock->next;
clientsock->next = NULL;
newsock->conn = clientsock;
clientsock->conn = newsock;
clientsock->state = SS_CONNECTED;
newsock->state = SS_CONNECTED;
wake_up(clientsock->wait);
unix_data_ref (UN_DATA(newsock->conn));
UN_DATA(newsock)->peerupd = UN_DATA(newsock->conn);
return 0;
}
概述
应用程序通过
socket
创建套件字句柄,通过bind
绑定地址, 通过listen
监听端口,通过connect
发起连接,通过accept
接收连接, 通过read
、recv
读取数据,通过write
、send
发送数据。 我们就来看下这些函数的内核实现。代码分析
linux一切兼文件,网络子系统也是建立在文件系统之上的, 因此先回顾一下进程打开文件结构体,也就是说每个socket也会对应一个file结构体,通过
fd
索引。然后我们再看网络子系统, 内核初始化时,会调用 sock_init 函数初始化网络系统。
先看下内核socket结构,定义在net/kern_sock.h文件中:
net/socket.c文件中
AF_UNIX unix socket实现。 net/unix.c 文件中
linux0.98通过
sys_socketcall
函数封装了概述中的函数实现,具体socket
则是调用了内核sock_socket
函数。通过文件句柄
fd
从当前打开文件数组current->filp[fd]
获取对应的打开文件结构file
,file.f_inode
是socket结构体
对应的inode,也就是socket.dummy
的值, 而socket.data
是对应协议的数据结构。socket.ops
是对应协议族的回调函数。创建好socket句柄后,就需要bind地址。
在net/socket.c文件中,
net/unix.c文件
bind
做的事情,通过fd找到对应的socket结构,调用对应协议族的bind
回调函数, 对于unix socket,则打开一个文件,打开文件的inode赋值到unix socket结构体的upd->inode
。服务端程序调用
listen
监听,对于unix socket协议族,listen没干啥实质的事情,仅仅是把socket结构体的状态改一下sock->flags |= SO_ACCEPTCON;
。 对于客户端程序则需要调用connect
发起连接,对应内核的net/socket.c文件中的sock_connect
函数,实际上就是对 对应协议族回调函数sock->ops->connect
的封装调用。net/unix.c文件:
客户端调用accept发起连接后,服务端程序调用accept接收连接。
连接建立起来后,就可以通过
read
和write
函数读写数据了。write
对应内核函数为sys_write
,read
对应的内核函数为sys_read
总结
sock_init
函数,该函数调用了内核支持的协议族(AF_UNIX
,AF_INET
)初始化函数unix_proto_ops.init
(unix_proto_init
),inet_proto_ops.init
(ip_proto_init
) 。