Linux 内核文件系统与设备操作流程分析(3)

szhlwang 2011-10-10

展开全文

到这里我们可以解释 ext3_read_inode() 函数是何时调用的了，可以说是
open_namei() 函数在路径转换时间接的调用了 iget() 函数，而 iget() 函
数则是调用了已经注册好的超级块(super block)函数集 ext3_sops 中的
ext3_read_inode() 函数来获取相应的 inode。其实这也就可以解释为什么
在 struct inode->i_fop 中（也就是 ext3_file_operations 函数集中）
open 操作函数 generic_file_open() 是个空操作。因为其对应的 inode
已经在 open_namei()->iget() 中得到了，得到了一个 inode 其实在实际
文件系统中就是一个打开操作，得到了 inode 当然就可以对它进行读/写
操作了。只所以提供了一个 generic_file_open() 应该是占位用的，占位
的目的应该是为了可以使用用户提供的操作方法。也就是说，如果你自己
写了一个 open 操作并赋值给 struct inode->i_fop->open 的话，系统会
调用你所提供的这个 open 操作。我们在上面分析 __dentry_open() 函数时
已经指出了这个调用点。以上的疑问都得到了解答，但这里又再次引出了一
个疑问，那就是这个已经注册好了的超级块(super block)函数集 ext3_sops
是什么时候注册的？要解答这个疑问我们只能从头，也就是 mount 文件系统
时进行分析。

在分析 mount 前我们首先来了解下如下结构，这个结构是在注册新的文件
系统时被作为参数传递的，注册文件系统的函数为 register_filesyste()。

struct file_system_type {

        //
        // 文件系统名称，如：ext3
        //
    const char *name;
    int fs_flags;

        //
        // 实际文件系统的超级块(super block)函数。在 mount 时通
        // 过它来得到超级块的信息，包含 inode 等。
        //
    int (*get_sb) (struct file_system_type *, int,
               const char *, void *, struct vfsmount *);
    void (*kill_sb) (struct super_block *);

        //
        // 当前模块
        //
    struct module *owner;

        //
        // 指向下一个文件系统地址
        //
    struct file_system_type * next;
    struct list_head fs_supers;
    struct lock_class_key s_lock_key;
    struct lock_class_key s_umount_key;
};

我们再来看下 ext3 文件系统是如何填充这个结构的。

static struct file_system_type ext3_fs_type = {
    .owner        = THIS_MODULE,
    .name        = "ext3",

         //
         // 注意这里的回调函数指向了 ext3_get_sb()
         //
    .get_sb        = ext3_get_sb,
    .kill_sb    = kill_block_super,
    .fs_flags    = FS_REQUIRES_DEV,
};

最终使用 register_filesystem( &ext3_fs_type ); 完成文件系统的注册。
这里仅是注册了文件系统，我们知道要使用一个文件系统首先要 mount 才可
使用。我们清楚了以上结构后，接着来看 vfs_kern_mount() 函数，这个函数
是内核最终实现 mount 的函数，这个函数的第一个参数即是上面提到的
file_system_type 结构，在 ext3 文件系统下传递的是 ext3_fs_type。函数
中调用的 type->get_sb 即触发了 ext3_get_sb() 函数。

struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
    struct vfsmount *mnt;
    char *secdata = NULL;
    int error;

    if (!type)
        return ERR_PTR(-ENODEV);

    error = -ENOMEM;

        //
        // 根据名称分配一个新的 vfsmount 挂接点。
        //
    mnt = alloc_vfsmnt(name);
    if (!mnt)
        goto out;

    if (data) {
        secdata = alloc_secdata();
        if (!secdata)
            goto out_mnt;

        error = security_sb_copy_data(type, data, secdata);
        if (error)
            goto out_free_secdata;
    }

        //
        // 注意：这里调用了已注册文件系统的超级块(super block)函数
        // 对于 ext3 文件系统来说，就是调用了 ext3_get_sb，可参考
        // 以上对 file_system_type 的说明。
        //
    error = type->get_sb(type, flags, name, data, mnt);
    if (error < 0)
        goto out_free_secdata;

    error = security_sb_kern_mount(mnt->mnt_sb, secdata);
    if (error)
        goto out_sb;

        //
        // 这里的挂接点是一个 dentry 结构
        //
    mnt->mnt_mountpoint = mnt->mnt_root;

        //
        // 把新的 vfsmount 结构赋给自身的 parent 这样可以
        // 通过 parent 遍历出所有 mount 的文件系统
        //
    mnt->mnt_parent = mnt;
    up_write(&mnt->mnt_sb->s_umount);
    free_secdata(secdata);
    return mnt;

//
// 以下流程只有出错时才会走到
//
out_sb:
    dput(mnt->mnt_root);
    up_write(&mnt->mnt_sb->s_umount);
    deactivate_super(mnt->mnt_sb);
out_free_secdata:
    free_secdata(secdata);
out_mnt:
    free_vfsmnt(mnt);
out:
    return ERR_PTR(error);
}

下面的 ext3_get_sb() 函数仅是个简单的封状，直接调用的 get_sb_bdev()
函数，但这里要注意 get_sb_bdev() 函数不是严格按照 ext3_get_sb() 函数
进行传递的，它本身多出了一个 ext3_fill_super 参数，而这个参数是以一个
回调函数形式提供的。

static int ext3_get_sb(struct file_system_type *fs_type,
    int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
        //
        // 注意：这里多了一个 ext3_fill_super() 的回调函数。
        //
    return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
}

了解了以上结构我们再来看 ext3_fill_super() 函数的具体实现，这个函数的第
一个参数即是一个超级块(super block)结构。在此函数中将上面提到的 ext3 超级
块(super block) 函数集 ext3_sops 赋给了此结构。然后调用 iget() 函数触发
超级块(super block) 函数集。

static int ext3_fill_super (struct super_block *sb, void *data, int silent)
{

        //
        // 篇幅所限，在这个函数中我们只列出相关代码。
        //

        //
        // 设置超级块的函数集
        //
    sb->s_op = &ext3_sops;
    sb->s_export_op = &ext3_export_ops;
    sb->s_xattr = ext3_xattr_handlers;
#ifdef CONFIG_QUOTA
    sb->s_qcop = &ext3_qctl_operations;
    sb->dq_op = &ext3_quota_operations;
#endif
    INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */

    sb->s_root = NULL;

        //
        // 调用 iget() 函数得到相应的 inode。
        //
    root = iget(sb, EXT3_ROOT_INO);

        //
        // 根据得到的根 inode 分配超级块(super block)中的
        // s_root 此字段是一个 struct dentry 结构。
        //
    sb->s_root = d_alloc_root(root);

        //
        // 如果根 dentry 无效则提示错误跳到失败处。
        //
    if (!sb->s_root) {
        printk(KERN_ERR "EXT3-fs: get root inode failed\n");
        iput(root);
        goto failed_mount4;
    }

        //
        // 如果根 inode 不是目录或者大小与块无效则提示错误
        // 跳到失败处。
        //
    if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
        dput(sb->s_root);
        sb->s_root = NULL;
        printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
        goto failed_mount4;
    }

}

至此所有流程都走到了，疑问也被一个个打破。我们在整体的梳理下流程。在内核
sys_open 被调用打开一个文件或者设备驱动时，调用 filp_open()->do_filp_open()
函数，在 do_filp_open() 函数中，首先利用 open_namei() 函数得到一个 struct
nameidata 结构，那么在这个过程中 __path_lookup_intent_open() 函数设置了
struct nameidata->intent.open 相关字段，然后调用 do_path_lookup() 函数，在这
个函数中设置了 struct nameidata->mnt 与 struct nameidata->dentry 相关字段后
调用了 _link_path_walk() 函数开始分解路径，并依次调用 do_lookup() 函数来
获得路径中个目录与最终文件的 struct inode。do_lookup() 函数先从 inode 缓存
即 hlist 中查找 inode，如果没有找到则调用 real_lookup() 函数，此函数分配
了一个 struct dentry 结构，然后使用上层目录的 struct inode->i_op->lookup()
方法来继续查找，这样就触发了 ext3_lookup() 函数，而此函数得到 struct dentry
与 inode number 后调用 iget() 函数来返回 struct inode。（这里有必要强调一点，
那就是不仅目录才有 struct dentry 结构，一个文件也拥有一个 struct dentry 结
构，这个从上面具体代码分析中可以看到）而 iget() 函数是使用 struct inode 超
级块(super block)中的函数 ext3_read_inode() 来最终完成从磁盘读取 inode 操
作，读到一个 in core 类型的 struct inode 后为了提供文件与设备读/写等操作设
置了 struct inode->i_op 与 struct inode->i_fop 函数集。其实以上步骤按照提供
的系统调用以及内核操作流程来理解等于是打开了一个文件或目录。这也就是为什么
在 ext3_file_operations 函数集中只有读/写等操作，而打开是空操作的原因。至于
为什么提供一个空操作函数，在上面分析时已经给出了，这里不在阐述。到此
struct inode，struct dentry, struct nameidata 结构都已完全填充好。在
open_namei() 调用返回后将得到的 nameidata 结构作为参数调用
nameidata_to_filp() 函数，在此函数当中使用 struct dentry 作参数调用了
__dentry_open() 函数，在这个函数中会动态初始化一个 struct file 结构，并使用
struct inode->i_fop 函数集来填充 struct file->f_op （别忘了，我们前面的 inode
结构中相关域都已经准备好了，这里直接拿来使用即可）。那么不管是文件还是设备驱
动，可以看出来是走到具体文件系统这里才开始区分的。如果是目录/文件/连接则直接
使用 ext3_file_xxx 或 ext3_dir_xxx 等函数集。如果操作对象是一个设备驱动的话
则使用 init_special_inode 来初始化不同的设备驱动，如果是一个字符设备驱动的
话则调用 chrdev_open() 函数来对应 struct file 操作集。而上面提到的超级块
(super block) 函数是在注册文件系统注册时由 register_filesystem() 函数注册，
在 mount 时由 vfs_kern_mount() 函数间接调用 ext3_fill_super() 函数时进行关联
的。具体可以看上面的代码分析，这里不在详述。所有流程清晰后我们再说一下 struct
inode 中的几个函数集的区别与作用。我们这里仅以文件/目录为例进行解释，struct
inode_operations 操作是对文件(inode)的建立/查找(打开)/删除/重命名操作，struct
file_operations 操作是对已经存在的文件的读/写/刷新/列目录(readdir)/发送控制字
操作。

参考：linux kernel source 2.6.19.1

/usr/fs/ext3/inode.c
/usr/fs/ext3/namei.c
/usr/fs/ext3/super.c
/usr/fs/ext3/file.c
/usr/fs/ext3/dir.c
/usr/fs/block_dev.c
/usr/fs/open.c
/usr/fs/dcache.h
/usr/fs/inode.c
/usr/fs/namei.c