linux-0.98文件系统初始化

概述

微软早期的操作系统是DOS，磁盘操作系统。可见磁盘管理在操作系统中的地位。而linux中一切兼文件，抽象出的虚拟文件系统更强大。而我一只想搞清楚一切兼文件的真正的意义，一切兼文件是如何实现的，一切兼文件是如何抽象出来的。

代码分析

在main.c文件中，初始化后切换到用户态后，fork出第一个进程先调用了init函数，该函数调用了setup函数初始化文件文件系统挂载根目录。 setup函数是用户态函数，真正的实现是内核kernel/blk_drv/genhd.c文件中的sys_setup函数，在该函数中调用了fs/super.c文件中的mount_root函数挂载根目录。


// 支持的文件系统
static struct file_system_type file_systems[] = {
    {minix_read_super,"minix"},
    {ext_read_super,"ext"},
    {msdos_read_super,"msdos"},
    {proc_read_super,"proc"},
    {NULL,NULL}
};

void mount_root(void)
{
    int i;
    struct file_system_type * fs_type = file_systems;
    struct super_block * p;
    struct inode * mi;

    if (32 != sizeof (struct minix_inode))
        panic("bad i-node size");

        // 初始化全局文件表 
        // fs/file_table.c文件中定义
    for(i=0;i<NR_FILE;i++)
        file_table[i].f_count=0;
    fcntl_init_locks();
    if (MAJOR(ROOT_DEV) == 2) {
        printk("Insert root floppy and press ENTER");
        wait_for_keypress();
    }
        // 初始化超级块数组
    for(p = &super_block[0] ; p < &super_block[NR_SUPER] ; p++) {
        p->s_dev = 0;
        p->s_blocksize = 0;
        p->s_lock = 0;
        p->s_wait = NULL;
        p->s_mounted = p->s_covered = NULL;
    }
    while (fs_type->read_super && fs_type->name) {
        p = read_super(ROOT_DEV,fs_type->name,0,NULL);
        if (p) {
            mi = p->s_mounted;
            mi->i_count += 3 ;  /* NOTE! it is logically used 4 times, not 1 */
            p->s_covered = mi;
            p->s_flags = 0;
            current->pwd = mi;
            current->root = mi;
            return;
        }
        fs_type++;
    }
    panic("Unable to mount root");
}

static struct super_block * read_super(int dev,char *name,int flags,void *data)
{
    struct super_block * s;
    struct file_system_type *type;

    if (!dev)
        return NULL;
    check_disk_change(dev);
    if (s = get_super(dev))
        return s;
    if (!(type = get_fs_type(name))) {
        printk("get fs type failed %s\n",name);
        return NULL;
    }
    for (s = 0+super_block ;; s++) {
        if (s >= NR_SUPER+super_block)
            return NULL;
        if (!s->s_dev)
            break;
    }
    s->s_dev = dev;
    s->s_flags = flags;
        // ext文件系统，ext_read_super函数
    if (!type->read_super(s,data)) {
        s->s_dev = 0;
        return NULL;
    }
    s->s_dev = dev;
    s->s_covered = NULL;
    s->s_rd_only = 0;
    s->s_dirt = 0;
    return s;
}

支持minix ext msdos等文件系统。以ext为例，挂载调用了fs/ext/inode.c文件中ext_read_super函数。该函数主要功能就是读取磁盘超级块的信息保存到super_block结构体的u.ext_sb中，s_op 指向ext_sops;

include/linux/fs.h文件

struct super_block {
    unsigned short s_dev;
    unsigned long s_blocksize;
    unsigned char s_lock;
    unsigned char s_rd_only;
    unsigned char s_dirt;
    struct super_operations *s_op;
    unsigned long s_flags;
    unsigned long s_magic;
    unsigned long s_time;
    struct inode * s_covered;
    struct inode * s_mounted;
    struct wait_queue * s_wait;
    union {
        struct minix_sb_info minix_sb;
        struct ext_sb_info ext_sb;              // <-----include/linux/ext_fs_sb.h文件
        struct msdos_sb_info msdos_sb;
    } u;
};

fs/ext/inode.c文件

static struct super_operations ext_sops = { 
    ext_read_inode,
    ext_write_inode,
    ext_put_inode,
    ext_put_super,
    ext_write_super,
    ext_statfs
};

// 读取ext文件系统的超级块
struct super_block *ext_read_super(struct super_block *s,void *data)
{
    struct buffer_head *bh;
    struct ext_super_block *es;
    int dev = s->s_dev,block;

    lock_super(s);
    if (!(bh = bread(dev, 1, BLOCK_SIZE))) {
        s->s_dev=0;
        free_super(s);
        printk("bread failed\n");
        return NULL;
    }
    es = (struct ext_super_block *) bh->b_data;
    s->s_blocksize = 1024;
    s->u.ext_sb.s_ninodes = es->s_ninodes;
    s->u.ext_sb.s_nzones = es->s_nzones;
    s->u.ext_sb.s_firstdatazone = es->s_firstdatazone;
    s->u.ext_sb.s_log_zone_size = es->s_log_zone_size;
    s->u.ext_sb.s_max_size = es->s_max_size;
    s->s_magic = es->s_magic;
    s->u.ext_sb.s_firstfreeblocknumber = es->s_firstfreeblock;
    s->u.ext_sb.s_freeblockscount = es->s_freeblockscount;
    s->u.ext_sb.s_firstfreeinodenumber = es->s_firstfreeinode;
    s->u.ext_sb.s_freeinodescount = es->s_freeinodescount;
    brelse(bh);
    if (s->s_magic != EXT_SUPER_MAGIC) {
        s->s_dev = 0;
        free_super(s);
        printk("magic match failed\n");
        return NULL;
    }
    if (!s->u.ext_sb.s_firstfreeblocknumber)
        s->u.ext_sb.s_firstfreeblock = NULL;
    else
        if (!(s->u.ext_sb.s_firstfreeblock = bread(dev,
            s->u.ext_sb.s_firstfreeblocknumber, BLOCK_SIZE))) {
            printk ("ext_read_super: unable to read first free block\n");
            s->s_dev = 0;
            free_super(s);
            return NULL;
        }
    if (!s->u.ext_sb.s_firstfreeinodenumber)
        s->u.ext_sb.s_firstfreeinodeblock = NULL;
    else {
        block = 2 + (s->u.ext_sb.s_firstfreeinodenumber - 1) / EXT_INODES_PER_BLOCK;
        if (!(s->u.ext_sb.s_firstfreeinodeblock = bread(dev, block, BLOCK_SIZE))) {
            printk ("ext_read_super: unable to read first free inode block\n");
            brelse(s->u.ext_sb.s_firstfreeblock);
            s->s_dev = 0;
            free_super (s);
            return NULL;
        }
    }
    free_super(s);
    /* set up enough so that it can read an inode */
    s->s_dev = dev;
    s->s_op = &ext_sops;   // 回调函数
        // ext文件系统的根目录inode
    if (!(s->s_mounted = iget(dev,EXT_ROOT_INO))) {
        s->s_dev=0;
        printk("get root inode failed\n");
        return NULL;
    }
    return s;
}

// 回调函数，读取ext文件系统inode
void ext_read_inode(struct inode * inode)
{
    struct buffer_head * bh;
    struct ext_inode * raw_inode;
    int block;

    block = 2 + (inode->i_ino-1)/EXT_INODES_PER_BLOCK;
    if (!(bh=bread(inode->i_dev, block, BLOCK_SIZE)))
        panic("unable to read i-node block");
    raw_inode = ((struct ext_inode *) bh->b_data) +
        (inode->i_ino-1)%EXT_INODES_PER_BLOCK;
    inode->i_mode = raw_inode->i_mode;
    inode->i_uid = raw_inode->i_uid;
    inode->i_gid = raw_inode->i_gid;
    inode->i_nlink = raw_inode->i_nlinks;
    inode->i_size = raw_inode->i_size;
    inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time;
    inode->i_blocks = inode->i_blksize = 0;
    if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
        inode->i_rdev = raw_inode->i_zone[0];
    else for (block = 0; block < 12; block++)
        inode->u.ext_i.i_data[block] = raw_inode->i_zone[block];
    brelse(bh);
    inode->i_op = NULL;
    if (S_ISREG(inode->i_mode))
        inode->i_op = &ext_file_inode_operations;
    else if (S_ISDIR(inode->i_mode))
        inode->i_op = &ext_dir_inode_operations;   // fs/ext/dir.c文件中
    else if (S_ISLNK(inode->i_mode))
        inode->i_op = &ext_symlink_inode_operations;
    else if (S_ISCHR(inode->i_mode))
        inode->i_op = &ext_chrdev_inode_operations;
    else if (S_ISBLK(inode->i_mode))
        inode->i_op = &ext_blkdev_inode_operations;
    else if (S_ISFIFO(inode->i_mode)) {
        inode->i_op = &ext_fifo_inode_operations;
        inode->i_pipe = 1;
        PIPE_BASE(*inode) = NULL;
        PIPE_HEAD(*inode) = PIPE_TAIL(*inode) = 0;
        PIPE_READ_WAIT(*inode) = PIPE_WRITE_WAIT(*inode) = NULL;
        PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
    }
}

然后，调用了fs/inode.c文件中的iget函数获取根目录inode。该函数先调用get_empty_inode函数从inode_table数组中获取一个空的inode，然后调用read_inode函数填充该空inode。

struct inode * iget(int dev,int nr)
{
    struct inode * inode, * empty;

    if (!dev)
        panic("iget with dev==0");
    empty = get_empty_inode();
        ......

    if (!empty)
        return (NULL);
    inode = empty;
        // 获取对应超级块节点
    if (!(inode->i_sb = get_super(dev))) {
        printk("iget: gouldn't get super-block\n\t");
        iput(inode);
        return NULL;
    }
    inode->i_dev = dev;
    inode->i_ino = nr;
    inode->i_flags = inode->i_sb->s_flags;
        // 调用函数填充inode
    read_inode(inode);
    return inode;
}

static void read_inode(struct inode * inode)
{
    lock_inode(inode);
        // 该s_op 指向的就是ext_sops结构体，对应的read_inode指向的是ext_read_inode函数
        // 然后调用对应文件系统的回调函数，读取inode
    if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->read_inode)
        inode->i_sb->s_op->read_inode(inode);
    unlock_inode(inode);
}

最后，返回到mount_root 函数中，根目录inode赋值给init进程结构体的pwd和root。在进程中调用open函数，依赖进程的pwd和root，而所有进程又都是init进程的子进程，继承了init进程的pwd和root。

总结

为了支持多种文件系统，linux-0.98的文件系统部分比linux-0.11的复杂了很多。

read_super 函数封装了多个文件系统的读取超级块的调用，不同的文件系统调用不同的函数，具体支持的文件系统见file_systems[]数组。
iget 函数封装了不同文件系统获取i 节点的调用函数，利用的是super_block的回调函数指针s_op。
不同文件系统不同类型的inode有不同的处理回调函数，利用的是inode的回调函数指针i_op。

vislee / leevis.com

linux-0.98文件系统初始化 #180

概述

代码分析

总结