📄 namei.c
字号:
/* * linux/fs/namei.c * * Copyright (C) 1991, 1992 Linus Torvalds *//* * Some corrections by tytso. *//* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname * lookup logic. *//* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. */#include <linux/init.h>#include <linux/slab.h>#include <linux/fs.h>#include <linux/quotaops.h>#include <linux/pagemap.h>#include <linux/dnotify.h>#include <linux/smp_lock.h>#include <linux/personality.h>#include <asm/namei.h>#include <asm/uaccess.h>#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])/* [Feb-1997 T. Schoebel-Theuer] * Fundamental changes in the pathname lookup mechanisms (namei) * were necessary because of omirr. The reason is that omirr needs * to know the _real_ pathname, not the user-supplied one, in case * of symlinks (and also when transname replacements occur). * * The new code replaces the old recursive symlink resolution with * an iterative one (in case of non-nested symlink chains). It does * this with calls to <fs>_follow_link(). * As a side effect, dir_namei(), _namei() and follow_link() are now * replaced with a single function lookup_dentry() that can handle all * the special cases of the former code. * * With the new dcache, the pathname is stored at each inode, at least as * long as the refcount of the inode is positive. As a side effect, the * size of the dcache depends on the inode cache and thus is dynamic. * * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink * resolution to correspond with current state of the code. * * Note that the symlink resolution is not *completely* iterative. * There is still a significant amount of tail- and mid- recursion in * the algorithm. Also, note that <fs>_readlink() is not used in * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() * may return different results than <fs>_follow_link(). Many virtual * filesystems (including /proc) exhibit this behavior. *//* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL * and the name already exists in form of a symlink, try to create the new * name indicated by the symlink. The old code always complained that the * name already exists, due to not following the symlink even if its target * is nonexistent. The new semantics affects also mknod() and link() when * the name is a symlink pointing to a non-existant name. * * I don't know which semantics is the right one, since I have no access * to standards. But I found by trial that HP-UX 9.0 has the full "new" * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the * "old" one. Personally, I think the new semantics is much more logical. * Note that "ln old new" where "new" is a symlink pointing to a non-existing * file does succeed in both HP-UX and SunOs, but not in Solaris * and in the old Linux semantics. *//* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink * semantics. See the comments in "open_namei" and "do_link" below. * * [10-Sep-98 Alan Modra] Another symlink change. *//* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: * inside the path - always follow. * in the last component in creation/removal/renaming - never follow. * if LOOKUP_FOLLOW passed - follow. * if the pathname has trailing slashes - follow. * otherwise - don't follow. * (applied in that order). * * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT * restored for 2.4. This is the last surviving part of old 4.2BSD bug. * During the 2.4 we need to fix the userland stuff depending on it - * hopefully we will be able to get rid of that wart in 2.5. So far only * XEmacs seems to be relying on it... *//* In order to reduce some races, while at the same time doing additional * checking and hopefully speeding things up, we copy filenames to the * kernel data space before using them.. * * POSIX.1 2.4: an empty pathname is invalid (ENOENT). * PATH_MAX includes the nul terminator --RR. */static inline int do_getname(const char *filename, char *page){ int retval; unsigned long len = PATH_MAX; if ((unsigned long) filename >= TASK_SIZE) { if (!segment_eq(get_fs(), KERNEL_DS)) return -EFAULT; } else if (TASK_SIZE - (unsigned long) filename < PATH_MAX) len = TASK_SIZE - (unsigned long) filename; retval = strncpy_from_user((char *)page, filename, len); if (retval > 0) { if (retval < len) return 0; return -ENAMETOOLONG; } else if (!retval) retval = -ENOENT; return retval;}char * getname(const char * filename){ char *tmp, *result; result = ERR_PTR(-ENOMEM); tmp = __getname(); if (tmp) { int retval = do_getname(filename, tmp); result = tmp; if (retval < 0) { putname(tmp); result = ERR_PTR(retval); } } return result;}/* * vfs_permission() * * is used to check for read/write/execute permissions on a file. * We use "fsuid" for this, letting us set arbitrary permissions * for filesystem access without changing the "normal" uids which * are used for other things.. */int vfs_permission(struct inode * inode, int mask){ umode_t mode = inode->i_mode; if (mask & MAY_WRITE) { /* * Nobody gets write access to a read-only fs. */ if (IS_RDONLY(inode) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; /* * Nobody gets write access to an immutable file. */ if (IS_IMMUTABLE(inode)) return -EACCES; } if (current->fsuid == inode->i_uid) mode >>= 6; else if (in_group_p(inode->i_gid)) mode >>= 3; /* * If the DACs are ok we don't need any capability check. */ if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) return 0; /* * Read/write DACs are always overridable. * Executable DACs are overridable if at least one exec bit is set. */ if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO)) if (capable(CAP_DAC_OVERRIDE)) return 0; /* * Searching includes executable on directories, else just read. */ if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) if (capable(CAP_DAC_READ_SEARCH)) return 0; return -EACCES;}int permission(struct inode * inode,int mask){ if (inode->i_op && inode->i_op->permission) { int retval; lock_kernel(); retval = inode->i_op->permission(inode, mask); unlock_kernel(); return retval; } return vfs_permission(inode, mask);}/* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. * This is used for regular files. * We cannot support write (and maybe mmap read-write shared) accesses and * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode * can have the following values: * 0: no writers, no VM_DENYWRITE mappings * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist * > 0: (i_writecount) users are writing to the file. * * Normally we operate on that counter with atomic_{inc,dec} and it's safe * except for the cases where we don't hold i_writecount yet. Then we need to * use {get,deny}_write_access() - these functions check the sign and refuse * to do the change if sign is wrong. Exclusion between them is provided by * spinlock (arbitration_lock) and I'll rip the second arsehole to the first * who will try to move it in struct inode - just leave it here. */static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;int get_write_access(struct inode * inode){ spin_lock(&arbitration_lock); if (atomic_read(&inode->i_writecount) < 0) { spin_unlock(&arbitration_lock); return -ETXTBSY; } atomic_inc(&inode->i_writecount); spin_unlock(&arbitration_lock); return 0;}int deny_write_access(struct file * file){ spin_lock(&arbitration_lock); if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) { spin_unlock(&arbitration_lock); return -ETXTBSY; } atomic_dec(&file->f_dentry->d_inode->i_writecount); spin_unlock(&arbitration_lock); return 0;}void path_release(struct nameidata *nd){ dput(nd->dentry); mntput(nd->mnt);}/* * Internal lookup() using the new generic dcache. * SMP-safe */static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags){ struct dentry * dentry = d_lookup(parent, name); if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { dput(dentry); dentry = NULL; } } return dentry;}/* * This is called when everything else fails, and we actually have * to go to the low-level filesystem to find out what we should do.. * * We get the directory semaphore, and after getting that we also * make sure that nobody added the entry to the dcache in the meantime.. * SMP-safe */static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags){ struct dentry * result; struct inode *dir = parent->d_inode; down(&dir->i_sem); /* * First re-do the cached lookup just in case it was created * while we waited for the directory semaphore.. * * FIXME! This could use version numbering or similar to * avoid unnecessary cache lookups. */ result = d_lookup(parent, name); if (!result) { struct dentry * dentry = d_alloc(parent, name); result = ERR_PTR(-ENOMEM); if (dentry) { lock_kernel(); result = dir->i_op->lookup(dir, dentry); unlock_kernel(); if (result) dput(dentry); else result = dentry; } up(&dir->i_sem); return result; } /* * Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. */ up(&dir->i_sem); if (result->d_op && result->d_op->d_revalidate) { if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) { dput(result); result = ERR_PTR(-ENOENT); } } return result;}/* * This limits recursive symlink follows to 8, while * limiting consecutive symlinks to 40. * * Without that kind of total limit, nasty chains of consecutive * symlinks can cause almost arbitrarily long lookups. */static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd){ int err; if (current->link_count >= 5) goto loop; if (current->total_link_count >= 40) goto loop; if (current->need_resched) { current->state = TASK_RUNNING; schedule(); } current->link_count++; current->total_link_count++; UPDATE_ATIME(dentry->d_inode); err = dentry->d_inode->i_op->follow_link(dentry, nd); current->link_count--; return err;loop: path_release(nd); return -ELOOP;}static inline int __follow_up(struct vfsmount **mnt, struct dentry **base){ struct vfsmount *parent; struct dentry *dentry; spin_lock(&dcache_lock); parent=(*mnt)->mnt_parent; if (parent == *mnt) { spin_unlock(&dcache_lock); return 0; } mntget(parent); dentry=dget((*mnt)->mnt_mountpoint); spin_unlock(&dcache_lock); dput(*base); *base = dentry; mntput(*mnt); *mnt = parent; return 1;}int follow_up(struct vfsmount **mnt, struct dentry **dentry){ return __follow_up(mnt, dentry);}static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry){ struct vfsmount *mounted; spin_lock(&dcache_lock); mounted = lookup_mnt(*mnt, *dentry); if (mounted) { *mnt = mntget(mounted); spin_unlock(&dcache_lock); dput(*dentry); mntput(mounted->mnt_parent); *dentry = dget(mounted->mnt_root); return 1; } spin_unlock(&dcache_lock); return 0;}int follow_down(struct vfsmount **mnt, struct dentry **dentry){ return __follow_down(mnt,dentry);} static inline void follow_dotdot(struct nameidata *nd){ while(1) { struct vfsmount *parent; struct dentry *dentry; read_lock(¤t->fs->lock); if (nd->dentry == current->fs->root && nd->mnt == current->fs->rootmnt) { read_unlock(¤t->fs->lock); break; } read_unlock(¤t->fs->lock); spin_lock(&dcache_lock); if (nd->dentry != nd->mnt->mnt_root) { dentry = dget(nd->dentry->d_parent); spin_unlock(&dcache_lock); dput(nd->dentry); nd->dentry = dentry; break; } parent=nd->mnt->mnt_parent; if (parent == nd->mnt) { spin_unlock(&dcache_lock); break; } mntget(parent); dentry=dget(nd->mnt->mnt_mountpoint); spin_unlock(&dcache_lock); dput(nd->dentry); nd->dentry = dentry; mntput(nd->mnt); nd->mnt = parent; }}/* * Name resolution. * * This is the basic name resolution function, turning a pathname * into the final dentry. * * We expect 'base' to be positive and a directory. */int link_path_walk(const char * name, struct nameidata *nd){ struct dentry *dentry; struct inode *inode; int err; unsigned int lookup_flags = nd->flags; while (*name=='/') name++; if (!*name) goto return_base; inode = nd->dentry->d_inode; if (current->link_count) lookup_flags = LOOKUP_FOLLOW; /* At this point we know we have a real path component. */ for(;;) { unsigned long hash; struct qstr this; unsigned int c; err = permission(inode, MAY_EXEC); dentry = ERR_PTR(err); if (err) break; this.name = name; c = *(const unsigned char *)name; hash = init_name_hash(); do { name++; hash = partial_name_hash(c, hash); c = *(const unsigned char *)name; } while (c && (c != '/')); this.len = name - (const char *) this.name; this.hash = end_name_hash(hash); /* remove trailing slashes? */ if (!c) goto last_component; while (*++name == '/'); if (!*name) goto last_with_slashes; /* * "." and ".." are special - ".." especially so because it has * to be able to know about the current root directory and * parent relationships. */ if (this.name[0] == '.') switch (this.len) { default: break; case 2: if (this.name[1] != '.') break; follow_dotdot(nd); inode = nd->dentry->d_inode; /* fallthrough */ case 1: continue;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -