Linux的虚拟文件系统VFS

2020-06-01 00:00:00 路径文件是一个文件系统结构

没想到Virtual File System VFS还是比较受欢迎的，所以今天详细写一下这一部分。

VFS是一个抽象层，对不同文件系统的实现屏蔽，对上提供统一的接口。

这张图是Linux内核中对于VFS相关数据结构的描述。

每一个进程在内核中，都对应一个task_struct结构

include/linux/sched.h中有

struct task_struct {

/* Open file information: */

struct files_struct *files;

从注释就可以看出是对所有打开的文件的一个结构。

struct files_struct {

struct file __rcu * fd_array[NR_OPEN_DEFAULT];

这里面有一个数据，保存了打开的所有文件，每个文件有一个文件描述符File Desicriptor FD，其中默认开启的有stdin, stdout, stderr，分别为0，1，2。

所以在命令行执行一个后台命令的时候常这样用：

nohup run_command.sh > run.log 2>&1 &

也即我们把stderr和并到stdout里面，全部输出到文件run.log里面。

整个系统所有打开的文件保存在同一个链表中的，当一个文件被打开多次的时候，f_count记录被打开的次数，一般的文件系统不保护文件被多个进程写入，需要进程之间通过同步机制做这件事情。一旦一个文件被多个进程打开，如果在命令行删除文件之后，文件可能不可见了，但是不会被删除，已经打开的进程仍然可以读写文件，直到引用为0。

struct file {

union {

struct llist_node fu_llist;

struct rcu_head fu_rcuhead;

} f_u;

struct path f_path;

struct inode *f_inode; /* cached value */

const struct file_operations *f_op;

spinlock_t f_lock;

atomic_long_t f_count;

unsigned int f_flags;

fmode_t f_mode;

struct mutex f_pos_lock;

loff_t f_pos;

struct fown_struct f_owner;

其中path为

struct path {

struct vfsmount *mnt;

struct dentry *dentry;

};

这里dentry称为directory cache，顾名思义是一个缓存，为了查询快的，从系统启动开始，所有被引用过的文件，都会在这里缓存一下，在dentry结构里面有hashlist，可以方便通过文件或者路径名进行查找，有lru list，可以不断的淘汰。

这里vfsmount，称为mount list，每个被mount的linux文件系统，都会对应一项。对于被Mount的文件系统的跟路径和mount point的路径，各对应一个dentry。

如图是dentry和vfsmount的对应关系，对于左上角的图。

对于操作系统的根路径/对应一个dentry和一个vfsmount，还有一个file结构指向dentry和vfsmount。

home路径是一个mount point，因而对应两个dentry，一个表示上面这个文件系统的mount point，一个表示下面这个文件系统的root directory。有一个vfsmount对应于home路径，parent指向/对应的vfsmount。有一个file结构指向root directory的dentry和vfsmount。

project路径也是一个mount point，也对应两个dentry，有一个vfsmount，并且parent指向home对应的vfsmount。有一个file结构指向root directory的dentry和vfsmount。

对于普通的文件或者路径data和guide，各有一个dentry对应，各有一个file指向相应的dentry，vfsmount都指向project的vfsmount。

在file这个结构中，本质的是struct inode *f_inode，了解文件系统结构的同学知道，每个文件都有一个inode保存信息。

如图所示，文件系统会有SuperBlock，还有Inode BitMap，通过Inode的一个ID号，可以在Inode Table里面找到对应的inode。

Inode里面保存的是这个文件的数据保存在了哪些block中。

内核内存中的inode是硬盘上inode的一个缓存。

struct inode {

umode_t i_mode;

unsigned short i_opflags;

kuid_t i_uid;

kgid_t i_gid;

unsigned int i_flags;

const struct inode_operations *i_op;

struct super_block *i_sb;

struct address_space *i_mapping;

/* Stat data, not accessed from path walking */

unsigned long i_ino;

dev_t i_rdev;

loff_t i_size;

struct timespec i_atime;

struct timespec i_mtime;

struct timespec i_ctime;

spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */

unsigned short i_bytes;

unsigned int i_blkbits;

blkcnt_t i_blocks;

const struct file_operations *i_fop;

其中inode_operation是对inode可以执行的操作，file_operation是对文件可以执行的操作，对于不同的文件系统，这两个结构是不同的。

对于NFS来讲，有下面的文件系统类型

struct file_system_type nfs_fs_type = {

.owner = THIS_MODULE,

.name = "nfs",

.mount = nfs_fs_mount,

.kill_sb = nfs_kill_super,

.fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,

};

需要注册文件系统给VFS

ret = register_filesystem(&nfs_fs_type);

ret = register_nfs4_fs();

当应用层调用系统调用Mount的时候，会在内核里面调用

long do_mount(const char *dev_name, const char __user *dir_name,

const char *type_page, unsigned long flags, void *data_page)

终会调用

struct vfsmount *

vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)

{

struct mount *mnt;

struct dentry *root;

mnt = alloc_vfsmnt(name);

root = mount_fs(type, flags, name, data);

mnt->mnt.mnt_root = root;

mnt->mnt.mnt_sb = root->d_sb;

mnt->mnt_mountpoint = mnt->mnt.mnt_root;

mnt->mnt_parent = mnt;

lock_mount_hash();

list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);

unlock_mount_hash();

return &mnt->mnt;

}

其中

struct dentry *

mount_fs(struct file_system_type *type, int flags, const char *name, void *data)

{

struct dentry *root;

struct super_block *sb;

root = type->mount(type, flags, name, data);

到这里会调用NFS这个具体的文件系统的函数

struct dentry *nfs_fs_mount(struct file_system_type *fs_type,

int flags, const char *dev_name, void *raw_data)

里面重要的两步如下：

nfs_mod = get_nfs_version(mount_info.parsed->version);

mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod);

已经开始调用rpc层了。

struct dentry *nfs_try_mount(int flags, const char *dev_name,

struct nfs_mount_info *mount_info,

struct nfs_subversion *nfs_mod)

会调用

struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info,

struct nfs_subversion *nfs_mod)

{

struct nfs_server *server = nfs_create_server(mount_info, nfs_mod);

/* Create a client RPC handle for the NFS v3 ACL management interface */

if (!IS_ERR(server))

nfs_init_server_aclclient(server);

return server;

}

终会创建RPC的Client，进行相互通信。

error = nfs_init_server_rpcclient(server, &timeparms,

data->selected_flavor);

所以是符合上述过程的。

RPC层就比较复杂了。

是一个状态机，这层和本次文章无关，以后详细分解吧。

相关文章