Open vislee opened 2 years ago
微软早期的操作系统是DOS,磁盘操作系统。可见磁盘管理在操作系统中的地位。 而linux中一切兼文件,抽象出的虚拟文件系统更强大。而我一只想搞清楚一切兼文件的真正的意义,一切兼文件是如何实现的,一切兼文件是如何抽象出来的。
在main.c文件中,初始化后切换到用户态后,fork出第一个进程先调用了init函数,该函数调用了setup函数初始化文件文件系统挂载根目录。 setup函数是用户态函数,真正的实现是内核kernel/blk_drv/genhd.c文件中的sys_setup函数,在该函数中调用了fs/super.c文件中的mount_root函数挂载根目录。
fork
init
setup
sys_setup
mount_root
// 支持的文件系统 static struct file_system_type file_systems[] = { {minix_read_super,"minix"}, {ext_read_super,"ext"}, {msdos_read_super,"msdos"}, {proc_read_super,"proc"}, {NULL,NULL} }; void mount_root(void) { int i; struct file_system_type * fs_type = file_systems; struct super_block * p; struct inode * mi; if (32 != sizeof (struct minix_inode)) panic("bad i-node size"); // 初始化全局文件表 // fs/file_table.c文件中定义 for(i=0;i<NR_FILE;i++) file_table[i].f_count=0; fcntl_init_locks(); if (MAJOR(ROOT_DEV) == 2) { printk("Insert root floppy and press ENTER"); wait_for_keypress(); } // 初始化超级块数组 for(p = &super_block[0] ; p < &super_block[NR_SUPER] ; p++) { p->s_dev = 0; p->s_blocksize = 0; p->s_lock = 0; p->s_wait = NULL; p->s_mounted = p->s_covered = NULL; } while (fs_type->read_super && fs_type->name) { p = read_super(ROOT_DEV,fs_type->name,0,NULL); if (p) { mi = p->s_mounted; mi->i_count += 3 ; /* NOTE! it is logically used 4 times, not 1 */ p->s_covered = mi; p->s_flags = 0; current->pwd = mi; current->root = mi; return; } fs_type++; } panic("Unable to mount root"); } static struct super_block * read_super(int dev,char *name,int flags,void *data) { struct super_block * s; struct file_system_type *type; if (!dev) return NULL; check_disk_change(dev); if (s = get_super(dev)) return s; if (!(type = get_fs_type(name))) { printk("get fs type failed %s\n",name); return NULL; } for (s = 0+super_block ;; s++) { if (s >= NR_SUPER+super_block) return NULL; if (!s->s_dev) break; } s->s_dev = dev; s->s_flags = flags; // ext文件系统,ext_read_super函数 if (!type->read_super(s,data)) { s->s_dev = 0; return NULL; } s->s_dev = dev; s->s_covered = NULL; s->s_rd_only = 0; s->s_dirt = 0; return s; }
支持minix ext msdos等文件系统。以ext为例,挂载调用了fs/ext/inode.c文件中ext_read_super函数。 该函数主要功能就是读取磁盘超级块的信息保存到super_block结构体的u.ext_sb中,s_op 指向ext_sops;
ext_read_super
super_block
u.ext_sb
s_op
ext_sops
include/linux/fs.h文件
struct super_block { unsigned short s_dev; unsigned long s_blocksize; unsigned char s_lock; unsigned char s_rd_only; unsigned char s_dirt; struct super_operations *s_op; unsigned long s_flags; unsigned long s_magic; unsigned long s_time; struct inode * s_covered; struct inode * s_mounted; struct wait_queue * s_wait; union { struct minix_sb_info minix_sb; struct ext_sb_info ext_sb; // <-----include/linux/ext_fs_sb.h文件 struct msdos_sb_info msdos_sb; } u; };
fs/ext/inode.c文件
static struct super_operations ext_sops = { ext_read_inode, ext_write_inode, ext_put_inode, ext_put_super, ext_write_super, ext_statfs }; // 读取ext文件系统的超级块 struct super_block *ext_read_super(struct super_block *s,void *data) { struct buffer_head *bh; struct ext_super_block *es; int dev = s->s_dev,block; lock_super(s); if (!(bh = bread(dev, 1, BLOCK_SIZE))) { s->s_dev=0; free_super(s); printk("bread failed\n"); return NULL; } es = (struct ext_super_block *) bh->b_data; s->s_blocksize = 1024; s->u.ext_sb.s_ninodes = es->s_ninodes; s->u.ext_sb.s_nzones = es->s_nzones; s->u.ext_sb.s_firstdatazone = es->s_firstdatazone; s->u.ext_sb.s_log_zone_size = es->s_log_zone_size; s->u.ext_sb.s_max_size = es->s_max_size; s->s_magic = es->s_magic; s->u.ext_sb.s_firstfreeblocknumber = es->s_firstfreeblock; s->u.ext_sb.s_freeblockscount = es->s_freeblockscount; s->u.ext_sb.s_firstfreeinodenumber = es->s_firstfreeinode; s->u.ext_sb.s_freeinodescount = es->s_freeinodescount; brelse(bh); if (s->s_magic != EXT_SUPER_MAGIC) { s->s_dev = 0; free_super(s); printk("magic match failed\n"); return NULL; } if (!s->u.ext_sb.s_firstfreeblocknumber) s->u.ext_sb.s_firstfreeblock = NULL; else if (!(s->u.ext_sb.s_firstfreeblock = bread(dev, s->u.ext_sb.s_firstfreeblocknumber, BLOCK_SIZE))) { printk ("ext_read_super: unable to read first free block\n"); s->s_dev = 0; free_super(s); return NULL; } if (!s->u.ext_sb.s_firstfreeinodenumber) s->u.ext_sb.s_firstfreeinodeblock = NULL; else { block = 2 + (s->u.ext_sb.s_firstfreeinodenumber - 1) / EXT_INODES_PER_BLOCK; if (!(s->u.ext_sb.s_firstfreeinodeblock = bread(dev, block, BLOCK_SIZE))) { printk ("ext_read_super: unable to read first free inode block\n"); brelse(s->u.ext_sb.s_firstfreeblock); s->s_dev = 0; free_super (s); return NULL; } } free_super(s); /* set up enough so that it can read an inode */ s->s_dev = dev; s->s_op = &ext_sops; // 回调函数 // ext文件系统的根目录inode if (!(s->s_mounted = iget(dev,EXT_ROOT_INO))) { s->s_dev=0; printk("get root inode failed\n"); return NULL; } return s; } // 回调函数,读取ext文件系统inode void ext_read_inode(struct inode * inode) { struct buffer_head * bh; struct ext_inode * raw_inode; int block; block = 2 + (inode->i_ino-1)/EXT_INODES_PER_BLOCK; if (!(bh=bread(inode->i_dev, block, BLOCK_SIZE))) panic("unable to read i-node block"); raw_inode = ((struct ext_inode *) bh->b_data) + (inode->i_ino-1)%EXT_INODES_PER_BLOCK; inode->i_mode = raw_inode->i_mode; inode->i_uid = raw_inode->i_uid; inode->i_gid = raw_inode->i_gid; inode->i_nlink = raw_inode->i_nlinks; inode->i_size = raw_inode->i_size; inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time; inode->i_blocks = inode->i_blksize = 0; if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) inode->i_rdev = raw_inode->i_zone[0]; else for (block = 0; block < 12; block++) inode->u.ext_i.i_data[block] = raw_inode->i_zone[block]; brelse(bh); inode->i_op = NULL; if (S_ISREG(inode->i_mode)) inode->i_op = &ext_file_inode_operations; else if (S_ISDIR(inode->i_mode)) inode->i_op = &ext_dir_inode_operations; // fs/ext/dir.c文件中 else if (S_ISLNK(inode->i_mode)) inode->i_op = &ext_symlink_inode_operations; else if (S_ISCHR(inode->i_mode)) inode->i_op = &ext_chrdev_inode_operations; else if (S_ISBLK(inode->i_mode)) inode->i_op = &ext_blkdev_inode_operations; else if (S_ISFIFO(inode->i_mode)) { inode->i_op = &ext_fifo_inode_operations; inode->i_pipe = 1; PIPE_BASE(*inode) = NULL; PIPE_HEAD(*inode) = PIPE_TAIL(*inode) = 0; PIPE_READ_WAIT(*inode) = PIPE_WRITE_WAIT(*inode) = NULL; PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0; } }
然后,调用了fs/inode.c文件中的iget函数获取根目录inode。 该函数先调用get_empty_inode函数从inode_table数组中获取一个空的inode,然后调用read_inode函数填充该空inode。
iget
get_empty_inode
inode_table
inode
struct inode * iget(int dev,int nr) { struct inode * inode, * empty; if (!dev) panic("iget with dev==0"); empty = get_empty_inode(); ...... if (!empty) return (NULL); inode = empty; // 获取对应超级块节点 if (!(inode->i_sb = get_super(dev))) { printk("iget: gouldn't get super-block\n\t"); iput(inode); return NULL; } inode->i_dev = dev; inode->i_ino = nr; inode->i_flags = inode->i_sb->s_flags; // 调用函数填充inode read_inode(inode); return inode; } static void read_inode(struct inode * inode) { lock_inode(inode); // 该s_op 指向的就是ext_sops结构体,对应的read_inode指向的是ext_read_inode函数 // 然后调用对应文件系统的回调函数,读取inode if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->read_inode) inode->i_sb->s_op->read_inode(inode); unlock_inode(inode); }
最后,返回到mount_root 函数中,根目录inode赋值给init进程结构体的pwd和root。 在进程中调用open函数,依赖进程的pwd和root,而所有进程又都是init进程的子进程,继承了init进程的pwd和root。
init进程
pwd
root
open
为了支持多种文件系统,linux-0.98的文件系统部分比linux-0.11的复杂了很多。
read_super
file_systems[]
i 节点
i_op
概述
微软早期的操作系统是DOS,磁盘操作系统。可见磁盘管理在操作系统中的地位。 而linux中一切兼文件,抽象出的虚拟文件系统更强大。而我一只想搞清楚一切兼文件的真正的意义,一切兼文件是如何实现的,一切兼文件是如何抽象出来的。
代码分析
在main.c文件中,初始化后切换到用户态后,
fork
出第一个进程先调用了init
函数,该函数调用了setup
函数初始化文件文件系统挂载根目录。setup
函数是用户态函数,真正的实现是内核kernel/blk_drv/genhd.c文件中的sys_setup
函数,在该函数中调用了fs/super.c文件中的mount_root
函数挂载根目录。支持minix ext msdos等文件系统。以ext为例,挂载调用了fs/ext/inode.c文件中
ext_read_super
函数。 该函数主要功能就是读取磁盘超级块的信息保存到super_block
结构体的u.ext_sb
中,s_op
指向ext_sops
;include/linux/fs.h文件
fs/ext/inode.c文件
然后,调用了fs/inode.c文件中的
iget
函数获取根目录inode。 该函数先调用get_empty_inode
函数从inode_table
数组中获取一个空的inode
,然后调用read_inode函数填充该空inode。最后,返回到
mount_root
函数中,根目录inode
赋值给init进程
结构体的pwd
和root
。 在进程中调用open
函数,依赖进程的pwd
和root
,而所有进程又都是init进程
的子进程,继承了init进程
的pwd
和root
。总结
为了支持多种文件系统,linux-0.98的文件系统部分比linux-0.11的复杂了很多。
read_super
函数封装了多个文件系统的读取超级块的调用,不同的文件系统调用不同的函数,具体支持的文件系统见file_systems[]
数组。iget
函数封装了不同文件系统获取i 节点
的调用函数,利用的是super_block
的回调函数指针s_op
。inode
有不同的处理回调函数,利用的是inode
的回调函数指针i_op
。