📄 namei.c
字号:
/* * linux/fs/namei.c * * Copyright (C) 1991, 1992 Linus Torvalds *//* * Some corrections by tytso. *//* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname * lookup logic. *//* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. */#include <linux/init.h>#include <linux/slab.h>#include <linux/fs.h>#include <linux/quotaops.h>#include <linux/pagemap.h>#include <linux/dnotify.h>#include <linux/smp_lock.h>#include <linux/personality.h>#include <asm/namei.h>#include <asm/uaccess.h>#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])/* [Feb-1997 T. Schoebel-Theuer] * Fundamental changes in the pathname lookup mechanisms (namei) * were necessary because of omirr. The reason is that omirr needs * to know the _real_ pathname, not the user-supplied one, in case * of symlinks (and also when transname replacements occur). * * The new code replaces the old recursive symlink resolution with * an iterative one (in case of non-nested symlink chains). It does * this with calls to <fs>_follow_link(). * As a side effect, dir_namei(), _namei() and follow_link() are now * replaced with a single function lookup_dentry() that can handle all * the special cases of the former code. * * With the new dcache, the pathname is stored at each inode, at least as * long as the refcount of the inode is positive. As a side effect, the * size of the dcache depends on the inode cache and thus is dynamic. * * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink * resolution to correspond with current state of the code. * * Note that the symlink resolution is not *completely* iterative. * There is still a significant amount of tail- and mid- recursion in * the algorithm. Also, note that <fs>_readlink() is not used in * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() * may return different results than <fs>_follow_link(). Many virtual * filesystems (including /proc) exhibit this behavior. *//* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL * and the name already exists in form of a symlink, try to create the new * name indicated by the symlink. The old code always complained that the * name already exists, due to not following the symlink even if its target * is nonexistent. The new semantics affects also mknod() and link() when * the name is a symlink pointing to a non-existant name. * * I don't know which semantics is the right one, since I have no access * to standards. But I found by trial that HP-UX 9.0 has the full "new" * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the * "old" one. Personally, I think the new semantics is much more logical. * Note that "ln old new" where "new" is a symlink pointing to a non-existing * file does succeed in both HP-UX and SunOs, but not in Solaris * and in the old Linux semantics. *//* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink * semantics. See the comments in "open_namei" and "do_link" below. * * [10-Sep-98 Alan Modra] Another symlink change. *//* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: * inside the path - always follow. * in the last component in creation/removal/renaming - never follow. * if LOOKUP_FOLLOW passed - follow. * if the pathname has trailing slashes - follow. * otherwise - don't follow. * (applied in that order). * * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT * restored for 2.4. This is the last surviving part of old 4.2BSD bug. * During the 2.4 we need to fix the userland stuff depending on it - * hopefully we will be able to get rid of that wart in 2.5. So far only * XEmacs seems to be relying on it... *//* In order to reduce some races, while at the same time doing additional * checking and hopefully speeding things up, we copy filenames to the * kernel data space before using them.. * * POSIX.1 2.4: an empty pathname is invalid (ENOENT). * PATH_MAX includes the nul terminator --RR. */ /*将文件名拷贝到内核数据区*/static inline int do_getname(const char *filename, char *page){ int retval; /*路径名可含有的最多的字符个数*/ unsigned long len = PATH_MAX; /*若文件名地址大于等于用户进程地址*/ if ((unsigned long) filename >= TASK_SIZE) { /*若进程的地址限制与KERNEL_DS不相等*/ if (!segment_eq(get_fs(), KERNEL_DS)) /*返回错误信息*/ return -EFAULT; } /*获取较小的地址长度*/ else if (TASK_SIZE - (unsigned long) filename < PATH_MAX) len = TASK_SIZE - (unsigned long) filename; /*将filename拷贝len长度到page,返回实际拷贝长度*/ retval = strncpy_from_user((char *)page, filename, len); if (retval > 0) { /*retval大于0小于len则返回成功信息0*/ if (retval < len) return 0; /*retval大于等于len则返回错误信息文件名太长*/ return -ENAMETOOLONG; } else if (!retval) /*filename为空则返回无该文件的信息*/ retval = -ENOENT; return retval;}/*在系统空间中分配一个页面 *并从用户空间把文件名复制到这个页面上*/char * getname(const char * filename){ char *tmp, *result; /*返回值初始化*/ result = ERR_PTR(-ENOMEM); /*从内核缓存中分配空间,若成功则调用do_getname*/ tmp = __getname(); if (tmp) { /*调用do_getname具体实现*/ int retval = do_getname(filename, tmp); result = tmp; /*do_getname出错,则释放空间,并返回错误信息*/ if (retval < 0) { putname(tmp); result = ERR_PTR(retval); } } return result;}/* *如果一个文件的inode中i_op为空或一组操作中没有permission函数 *则根据VFS标准调用vfs_permission() *判断一个文件可进行操作,读?写?查找? *若可更改则返回0,不可则返回-EROFS */int vfs_permission(struct inode * inode, int mask){ /*将 inode结构中的成员i_mode(表示文件类型及存取权限)赋一个临时变量mode*/ umode_t mode = inode->i_mode; /*如果参数mask标记了MAY_WRITE说明对该文件有写的操作*/ if (mask & MAY_WRITE) { /*如果该文件对应的inode所指向的内存中的超级块super_block中的s_flags和 *MS_RDONLY进行“&”操作后说明是mount *rea-only, (#define MS RDONLY 1, mount read-only) *而如果inode中 i_mode和S_IFMT进行"&"操作后等于S_IFREG,说明是普通文件; *或者i_mode和S_IFMT进行“&”后等于S_IFDIR,说明是目录; *或者i_mode和S_IFMT进行“&”后等于S_IFLNK说明是链接文件; *则permission返回-EROFS,说明是只读*/ if (IS_RDONLY(inode) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; /*若inode中的i_flags和IS_IMMUTABLE进行"&"操作后不为0,说明该文件是不能写的 *则permission返回-EACCES, 对文件的写操作拒绝*/ if (IS_IMMUTABLE(inode)) return -EACCES; } /* *如果当前进程的进程控制块的fsuid和该文件的i_uid(拥有此文件的用户标识号)相等 *则mode右移6位 */ if (current->fsuid == inode->i_uid) mode >>= 6; /* *如果当前文件的i_gid(拥有此文件的用户所在组号)和当前进程控制块的fsgid相等 *或者为一组该进程用户组号之一,则mode右移3位 */ else if (in_group_p(inode->i_gid)) mode >>= 3; /*该文件中i_mode、mask相"&"且与MAY_READ、MAY_WRITE、 MAY_EXEC三者求或的结果相与 *若等于mask,则表明mask有效,返回0,说明该文件可以被覆盖*/ if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) return 0; /* * Read/write DACs are always overridable. * Executable DACs are overridable if at least one exec bit is set. */ if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO)) /*若capable(CAP_DAC_READ_SEARCH)不为0,说明该文件既可读写,又可查找*/ if (capable(CAP_DAC_OVERRIDE)) return 0; /* * 判断目录文件是否可写 */ if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) /*若capable(CAP_DAC_READ_SEARCH)不为0,说明该文件既可读写,又可查找*/ if (capable(CAP_DAC_READ_SEARCH)) return 0; /*返回不允许的信息*/ return -EACCES;}/*检查当前进程对文件的方问权限*/int permission(struct inode * inode,int mask){ /*判断一个文件的inode中i_op是否为空 *且一组操作中是否有permission函数*/ if (inode->i_op && inode->i_op->permission) { /*定义返回值*/ int retval; /*上锁*/ lock_kernel(); /*调用i_op指向的permission函数*/ retval = inode->i_op->permission(inode, mask); /*解锁*/ unlock_kernel(); return retval; } /* *如果一个文件的inode中i_op为空或一组操作中没有permission函数 *则根据VFS标准调用vfs_permission() */ return vfs_permission(inode, mask);}/* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. * This is used for regular files. * We cannot support write (and maybe mmap read-write shared) accesses and * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode * can have the following values: * 0: no writers, no VM_DENYWRITE mappings * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist * > 0: (i_writecount) users are writing to the file. * * Normally we operate on that counter with atomic_{inc,dec} and it's safe * except for the cases where we don't hold i_writecount yet. Then we need to * use {get,deny}_write_access() - these functions check the sign and refuse * to do the change if sign is wrong. Exclusion between them is provided by * spinlock (arbitration_lock) and I'll rip the second arsehole to the first * who will try to move it in struct inode - just leave it here. */static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;/*得到写权限*/int get_write_access(struct inode * inode){ /*上锁,互斥信号量*/ spin_lock(&arbitration_lock); /*如果文件inode中的i_writecount小于0,被拒绝写*/ if (atomic_read(&inode->i_writecount) < 0) { /*解锁*/ spin_unlock(&arbitration_lock); /*返回文件处于忙状态*/ return -ETXTBSY; } /*inode中的i_writecount加一*/ atomic_inc(&inode->i_writecount); /*解锁*/ spin_unlock(&arbitration_lock); return 0;}/*文件拒绝被写*/int deny_write_access(struct file * file){ /*上锁,互斥信号量*/ spin_lock(&arbitration_lock); /*进程指向的文件的inode中的i_writecount大于0,表示正在被写*/ if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) { /*解锁*/ spin_unlock(&arbitration_lock); /*返回文件处于忙状态*/ return -ETXTBSY; } /*进程指向的文件的inode中的i_writecount减一*/ atomic_dec(&file->f_dentry->d_inode->i_writecount); spin_unlock(&arbitration_lock); return 0;}/*释放资源*/void path_release(struct nameidata *nd){ /*释放一个dentry资源*/ dput(nd->dentry); /*释放一个vfsmount*/ mntput(nd->mnt);}/*从缓存中寻找指定目录的dentry*/static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags){ /*查找并建立指定目录的dentry*/ struct dentry * dentry = d_lookup(parent, name); /*dentry、dentry->d_op与dentry->d_op->d_revalidate都不为空*/ if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { /*dentry->d_op->d_revalidate(dentry, flags)为空且d_invalidate(dentry)为空*/ if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { /*释放dentry*/ dput(dentry); /*设dentry为空*/ dentry = NULL; } } /*返回dentry结构*/ return dentry;}/*查找指定文件的dentry并返回其地址*/static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags){ /*声明该目录的dentry*/ struct dentry * result; struct inode *dir = parent->d_inode; /*对信号量的操作,缓存中创建该目录名的dentry*/ down(&dir->i_sem); /*再在缓存中查找一次*/ result = d_lookup(parent, name); /*如果缓存中未找到*/ if (!result) { /*调用d_alloc申请一个dentry结构的内存空间*/ struct dentry * dentry = d_alloc(parent, name); /*返回值赋出错信息*/ result = ERR_PTR(-ENOMEM); /*若dentry不为空*/ if (dentry) { /*上锁*/ lock_kernel(); /*调用各文件系统自己的dir->i_op->lookup查找dentry结构*/ result = dir->i_op->lookup(dir, dentry); /*解锁*/ unlock_kernel(); /*如果能找到,则将在内核态的dentry释放*/ if (result) dput(dentry); /*找不到则返回值为dentry*/ else result = dentry; } up(&dir->i_sem); return result; } /*如果在缓冲中找到,则调用revalidate*/ up(&dir->i_sem); if (result->d_op && result->d_op->d_revalidate) { if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) { dput(result); result = ERR_PTR(-ENOENT); } } return result;}/* * This limits recursive symlink follows to 8, while * limiting consecutive symlinks to 40. * * Without that kind of total limit, nasty chains of consecutive * symlinks can cause almost arbitrarily long lookups. *//*处理链接文件*/static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd){ int err; /*从dentry入手,查找nd所包含的文件信息*/ if (current->link_count >= 5) goto loop; /*链的总长度的上限为40*/ if (current->total_link_count >= 40) goto loop; /*如果需要重新链接*/ if (current->need_resched) { /*当前的状态为TASK_RUNNING*/ current->state = TASK_RUNNING; /*保护运行队列不被删除和修改*/ schedule(); } /*链的长度加一*/ current->link_count++; /*链的总长度加一*/ current->total_link_count++; /*修改inode的存取时间*/ UPDATE_ATIME(dentry->d_inode); err = dentry->d_inode->i_op->follow_link(dentry, nd); current->link_count--; return err;loop: /*释放nd*/ path_release(nd); /*返回错误类型为过多的链接*/ return -ELOOP;}static inline int __follow_up(struct vfsmount **mnt, struct dentry **base){ struct vfsmount *parent; struct dentry *dentry; /*上锁,互斥信号量*/ spin_lock(&dcache_lock); /*将参数mnt的父设备的信息赋给parent*/ parent=(*mnt)->mnt_parent; /*如果本层的信息和它的上层信息一样*/ if (parent == *mnt) { /*解锁返回*/ spin_unlock(&dcache_lock); return 0; }/*增加其父设备的链接数,加一*/ mntget(parent); /*得到安装点的上一层目录*/ dentry=dget((*mnt)->mnt_mountpoint); /*解锁*/ spin_unlock(&dcache_lock); /*释放原有的目录结构体base*/ dput(*base); *base = dentry;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -