⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 namei.c

📁 linux的文件系统的部分代码的详细注释
💻 C
📖 第 1 页 / 共 5 页
字号:
/* *  linux/fs/namei.c * *  Copyright (C) 1991, 1992  Linus Torvalds *//* * Some corrections by tytso. *//* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname * lookup logic. *//* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. */#include <linux/init.h>#include <linux/slab.h>#include <linux/fs.h>#include <linux/quotaops.h>#include <linux/pagemap.h>#include <linux/dnotify.h>#include <linux/smp_lock.h>#include <linux/personality.h>#include <asm/namei.h>#include <asm/uaccess.h>#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])/* [Feb-1997 T. Schoebel-Theuer] * Fundamental changes in the pathname lookup mechanisms (namei) * were necessary because of omirr.  The reason is that omirr needs * to know the _real_ pathname, not the user-supplied one, in case * of symlinks (and also when transname replacements occur). * * The new code replaces the old recursive symlink resolution with * an iterative one (in case of non-nested symlink chains).  It does * this with calls to <fs>_follow_link(). * As a side effect, dir_namei(), _namei() and follow_link() are now  * replaced with a single function lookup_dentry() that can handle all  * the special cases of the former code. * * With the new dcache, the pathname is stored at each inode, at least as * long as the refcount of the inode is positive.  As a side effect, the * size of the dcache depends on the inode cache and thus is dynamic. * * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink * resolution to correspond with current state of the code. * * Note that the symlink resolution is not *completely* iterative. * There is still a significant amount of tail- and mid- recursion in * the algorithm.  Also, note that <fs>_readlink() is not used in * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() * may return different results than <fs>_follow_link().  Many virtual * filesystems (including /proc) exhibit this behavior. *//* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL * and the name already exists in form of a symlink, try to create the new * name indicated by the symlink. The old code always complained that the * name already exists, due to not following the symlink even if its target * is nonexistent.  The new semantics affects also mknod() and link() when * the name is a symlink pointing to a non-existant name. * * I don't know which semantics is the right one, since I have no access * to standards. But I found by trial that HP-UX 9.0 has the full "new" * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the * "old" one. Personally, I think the new semantics is much more logical. * Note that "ln old new" where "new" is a symlink pointing to a non-existing * file does succeed in both HP-UX and SunOs, but not in Solaris * and in the old Linux semantics. *//* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink * semantics.  See the comments in "open_namei" and "do_link" below. * * [10-Sep-98 Alan Modra] Another symlink change. *//* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: *	inside the path - always follow. *	in the last component in creation/removal/renaming - never follow. *	if LOOKUP_FOLLOW passed - follow. *	if the pathname has trailing slashes - follow. *	otherwise - don't follow. * (applied in that order). * * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT * restored for 2.4. This is the last surviving part of old 4.2BSD bug. * During the 2.4 we need to fix the userland stuff depending on it - * hopefully we will be able to get rid of that wart in 2.5. So far only * XEmacs seems to be relying on it... *//* In order to reduce some races, while at the same time doing additional * checking and hopefully speeding things up, we copy filenames to the * kernel data space before using them.. * * POSIX.1 2.4: an empty pathname is invalid (ENOENT). * PATH_MAX includes the nul terminator --RR. */ /*将文件名拷贝到内核数据区*/static inline int do_getname(const char *filename, char *page){	int retval;	/*路径名可含有的最多的字符个数*/	unsigned long len = PATH_MAX;	/*若文件名地址大于等于用户进程地址*/	if ((unsigned long) filename >= TASK_SIZE) {		/*若进程的地址限制与KERNEL_DS不相等*/		if (!segment_eq(get_fs(), KERNEL_DS))			/*返回错误信息*/			return -EFAULT;	} 	/*获取较小的地址长度*/	else if (TASK_SIZE - (unsigned long) filename < PATH_MAX)		len = TASK_SIZE - (unsigned long) filename;	/*将filename拷贝len长度到page,返回实际拷贝长度*/	retval = strncpy_from_user((char *)page, filename, len);	if (retval > 0) {		/*retval大于0小于len则返回成功信息0*/		if (retval < len)			return 0;		/*retval大于等于len则返回错误信息文件名太长*/		return -ENAMETOOLONG;	} else if (!retval)		/*filename为空则返回无该文件的信息*/		retval = -ENOENT;	return retval;}/*在系统空间中分配一个页面  *并从用户空间把文件名复制到这个页面上*/char * getname(const char * filename){	char *tmp, *result;	/*返回值初始化*/	result = ERR_PTR(-ENOMEM);	/*从内核缓存中分配空间,若成功则调用do_getname*/	tmp = __getname();	if (tmp)  {		/*调用do_getname具体实现*/		int retval = do_getname(filename, tmp);		result = tmp;		/*do_getname出错,则释放空间,并返回错误信息*/		if (retval < 0) {			putname(tmp);			result = ERR_PTR(retval);		}	}	return result;}/* *如果一个文件的inode中i_op为空或一组操作中没有permission函数 *则根据VFS标准调用vfs_permission() *判断一个文件可进行操作,读?写?查找? *若可更改则返回0,不可则返回-EROFS */int vfs_permission(struct inode * inode, int mask){	/*将 inode结构中的成员i_mode(表示文件类型及存取权限)赋一个临时变量mode*/	umode_t			mode = inode->i_mode;	 /*如果参数mask标记了MAY_WRITE说明对该文件有写的操作*/	if (mask & MAY_WRITE) {				/*如果该文件对应的inode所指向的内存中的超级块super_block中的s_flags和		  *MS_RDONLY进行“&”操作后说明是mount  *rea-only, (#define MS RDONLY 1, mount read-only)		  *而如果inode中 i_mode和S_IFMT进行"&"操作后等于S_IFREG,说明是普通文件;		  *或者i_mode和S_IFMT进行“&”后等于S_IFDIR,说明是目录;		  *或者i_mode和S_IFMT进行“&”后等于S_IFLNK说明是链接文件;		  *则permission返回-EROFS,说明是只读*/		if (IS_RDONLY(inode) &&		    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))			return -EROFS;		/*若inode中的i_flags和IS_IMMUTABLE进行"&"操作后不为0,说明该文件是不能写的		  *则permission返回-EACCES, 对文件的写操作拒绝*/		if (IS_IMMUTABLE(inode))			return -EACCES;	}	/*	  *如果当前进程的进程控制块的fsuid和该文件的i_uid(拥有此文件的用户标识号)相等	  *则mode右移6位	  */	if (current->fsuid == inode->i_uid)		mode >>= 6;	/*	  *如果当前文件的i_gid(拥有此文件的用户所在组号)和当前进程控制块的fsgid相等	  *或者为一组该进程用户组号之一,则mode右移3位	  */	else if (in_group_p(inode->i_gid))		mode >>= 3;	/*该文件中i_mode、mask相"&"且与MAY_READ、MAY_WRITE、 MAY_EXEC三者求或的结果相与	  *若等于mask,则表明mask有效,返回0,说明该文件可以被覆盖*/	if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))		return 0;	/*	 * Read/write DACs are always overridable.	 * Executable DACs are overridable if at least one exec bit is set.	 */	if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO))		/*若capable(CAP_DAC_READ_SEARCH)不为0,说明该文件既可读写,又可查找*/		if (capable(CAP_DAC_OVERRIDE))			return 0;	/*	 * 判断目录文件是否可写	 */	if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))		/*若capable(CAP_DAC_READ_SEARCH)不为0,说明该文件既可读写,又可查找*/		if (capable(CAP_DAC_READ_SEARCH))			return 0;	/*返回不允许的信息*/	return -EACCES;}/*检查当前进程对文件的方问权限*/int permission(struct inode * inode,int mask){	/*判断一个文件的inode中i_op是否为空	  *且一组操作中是否有permission函数*/	if (inode->i_op && inode->i_op->permission) {		/*定义返回值*/		int retval;		/*上锁*/		lock_kernel();		/*调用i_op指向的permission函数*/		retval = inode->i_op->permission(inode, mask);		/*解锁*/		unlock_kernel();		return retval;	}	/* 	  *如果一个文件的inode中i_op为空或一组操作中没有permission函数         *则根据VFS标准调用vfs_permission()         */	return vfs_permission(inode, mask);}/* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. * This is used for regular files. * We cannot support write (and maybe mmap read-write shared) accesses and * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode * can have the following values: * 0: no writers, no VM_DENYWRITE mappings * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist * > 0: (i_writecount) users are writing to the file. * * Normally we operate on that counter with atomic_{inc,dec} and it's safe * except for the cases where we don't hold i_writecount yet. Then we need to * use {get,deny}_write_access() - these functions check the sign and refuse * to do the change if sign is wrong. Exclusion between them is provided by * spinlock (arbitration_lock) and I'll rip the second arsehole to the first * who will try to move it in struct inode - just leave it here. */static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;/*得到写权限*/int get_write_access(struct inode * inode){	/*上锁,互斥信号量*/	spin_lock(&arbitration_lock);	/*如果文件inode中的i_writecount小于0,被拒绝写*/	if (atomic_read(&inode->i_writecount) < 0) {		/*解锁*/		spin_unlock(&arbitration_lock);		/*返回文件处于忙状态*/		return -ETXTBSY;	}	/*inode中的i_writecount加一*/	atomic_inc(&inode->i_writecount);	/*解锁*/	spin_unlock(&arbitration_lock);	return 0;}/*文件拒绝被写*/int deny_write_access(struct file * file){	/*上锁,互斥信号量*/	spin_lock(&arbitration_lock);	/*进程指向的文件的inode中的i_writecount大于0,表示正在被写*/	if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {		/*解锁*/		spin_unlock(&arbitration_lock);		/*返回文件处于忙状态*/		return -ETXTBSY;	}	/*进程指向的文件的inode中的i_writecount减一*/	atomic_dec(&file->f_dentry->d_inode->i_writecount);	spin_unlock(&arbitration_lock);	return 0;}/*释放资源*/void path_release(struct nameidata *nd){	/*释放一个dentry资源*/	dput(nd->dentry);	/*释放一个vfsmount*/	mntput(nd->mnt);}/*从缓存中寻找指定目录的dentry*/static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags){	/*查找并建立指定目录的dentry*/	struct dentry * dentry = d_lookup(parent, name);	/*dentry、dentry->d_op与dentry->d_op->d_revalidate都不为空*/	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {		/*dentry->d_op->d_revalidate(dentry, flags)为空且d_invalidate(dentry)为空*/		if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {			/*释放dentry*/			dput(dentry);			/*设dentry为空*/			dentry = NULL;		}	}	/*返回dentry结构*/	return dentry;}/*查找指定文件的dentry并返回其地址*/static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags){	/*声明该目录的dentry*/		struct dentry * result;	struct inode *dir = parent->d_inode;	/*对信号量的操作,缓存中创建该目录名的dentry*/	down(&dir->i_sem);		/*再在缓存中查找一次*/	result = d_lookup(parent, name);	/*如果缓存中未找到*/	if (!result) {		/*调用d_alloc申请一个dentry结构的内存空间*/		struct dentry * dentry = d_alloc(parent, name);		/*返回值赋出错信息*/		result = ERR_PTR(-ENOMEM);		/*若dentry不为空*/		if (dentry) {			/*上锁*/			lock_kernel();			/*调用各文件系统自己的dir->i_op->lookup查找dentry结构*/			result = dir->i_op->lookup(dir, dentry);			/*解锁*/			unlock_kernel();			/*如果能找到,则将在内核态的dentry释放*/			if (result)				dput(dentry);			/*找不到则返回值为dentry*/			else				result = dentry;		}		up(&dir->i_sem);		return result;	}	/*如果在缓冲中找到,则调用revalidate*/	up(&dir->i_sem);	if (result->d_op && result->d_op->d_revalidate) {		if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {			dput(result);			result = ERR_PTR(-ENOENT);		}	}	return result;}/* * This limits recursive symlink follows to 8, while * limiting consecutive symlinks to 40. * * Without that kind of total limit, nasty chains of consecutive * symlinks can cause almost arbitrarily long lookups.  *//*处理链接文件*/static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd){	int err;	/*从dentry入手,查找nd所包含的文件信息*/	if (current->link_count >= 5)		goto loop;	/*链的总长度的上限为40*/	if (current->total_link_count >= 40)		goto loop;	/*如果需要重新链接*/	if (current->need_resched) {		/*当前的状态为TASK_RUNNING*/		current->state = TASK_RUNNING;		/*保护运行队列不被删除和修改*/		schedule();	}	/*链的长度加一*/	current->link_count++;	/*链的总长度加一*/	current->total_link_count++;	/*修改inode的存取时间*/	UPDATE_ATIME(dentry->d_inode);	err = dentry->d_inode->i_op->follow_link(dentry, nd);	current->link_count--;	return err;loop:	/*释放nd*/	path_release(nd);	/*返回错误类型为过多的链接*/	return -ELOOP;}static inline int __follow_up(struct vfsmount **mnt, struct dentry **base){	struct vfsmount *parent;	struct dentry *dentry;	/*上锁,互斥信号量*/	spin_lock(&dcache_lock);	/*将参数mnt的父设备的信息赋给parent*/	parent=(*mnt)->mnt_parent;	/*如果本层的信息和它的上层信息一样*/	if (parent == *mnt) {		/*解锁返回*/		spin_unlock(&dcache_lock);		return 0;	}/*增加其父设备的链接数,加一*/	mntget(parent);	/*得到安装点的上一层目录*/	dentry=dget((*mnt)->mnt_mountpoint);	/*解锁*/	spin_unlock(&dcache_lock);	/*释放原有的目录结构体base*/	dput(*base);	*base = dentry;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -