📄 file.c
字号:
oinfo.oi_md = lsm; oinfo.oi_flags = ast_flags; rc = obd_enqueue(sbi->ll_osc_exp, &oinfo, &einfo, NULL); *policy = oinfo.oi_policy; if (rc > 0) rc = -EIO; ll_inode_size_lock(inode, 1); inode_init_lvb(inode, &lvb); obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 1); if (policy->l_extent.start == 0 && policy->l_extent.end == OBD_OBJECT_EOF) { /* vmtruncate()->ll_truncate() first sets the i_size and then * the kms under both a DLM lock and the * ll_inode_size_lock(). If we don't get the * ll_inode_size_lock() here we can match the DLM lock and * reset i_size from the kms before the truncating path has * updated the kms. generic_file_write can then trust the * stale i_size when doing appending writes and effectively * cancel the result of the truncate. Getting the * ll_inode_size_lock() after the enqueue maintains the DLM * -> ll_inode_size_lock() acquiring order. */ i_size_write(inode, lvb.lvb_size); CDEBUG(D_INODE, "inode=%lu, updating i_size %llu\n", inode->i_ino, i_size_read(inode)); } if (rc == 0) { LTIME_S(inode->i_mtime) = lvb.lvb_mtime; LTIME_S(inode->i_atime) = lvb.lvb_atime; LTIME_S(inode->i_ctime) = lvb.lvb_ctime; } ll_inode_size_unlock(inode, 1); RETURN(rc);}int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode, struct lov_stripe_md *lsm, int mode, struct lustre_handle *lockh){ struct ll_sb_info *sbi = ll_i2sbi(inode); int rc; ENTRY; /* XXX phil: can we do this? won't it screw the file size up? */ if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || (sbi->ll_flags & LL_SBI_NOLCK)) RETURN(0); rc = obd_cancel(sbi->ll_osc_exp, lsm, mode, lockh); RETURN(rc);}static void ll_set_file_contended(struct inode *inode){ struct ll_inode_info *lli = ll_i2info(inode); lli->lli_contention_time = cfs_time_current(); set_bit(LLI_F_CONTENDED, &lli->lli_flags);}void ll_clear_file_contended(struct inode *inode){ struct ll_inode_info *lli = ll_i2info(inode); clear_bit(LLI_F_CONTENDED, &lli->lli_flags);}static int ll_is_file_contended(struct file *file){ struct inode *inode = file->f_dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); ENTRY; if (!(sbi->ll_lco.lco_flags & OBD_CONNECT_SRVLOCK)) { CDEBUG(D_INFO, "the server does not support SRVLOCK feature," " osc connect flags = 0x"LPX64"\n", sbi->ll_lco.lco_flags); RETURN(0); } if (fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) RETURN(1); if (test_bit(LLI_F_CONTENDED, &lli->lli_flags)) { cfs_time_t cur_time = cfs_time_current(); cfs_time_t retry_time; retry_time = cfs_time_add( lli->lli_contention_time, cfs_time_seconds(sbi->ll_contention_time)); if (cfs_time_after(cur_time, retry_time)) { ll_clear_file_contended(inode); RETURN(0); } RETURN(1); } RETURN(0);}static int ll_file_get_tree_lock_iov(struct ll_lock_tree *tree, struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t start, loff_t end, int rw){ int append; int tree_locked = 0; int rc; struct inode * inode = file->f_dentry->d_inode; append = (rw == WRITE) && (file->f_flags & O_APPEND); if (append || !ll_is_file_contended(file)) { struct ll_lock_tree_node *node; int ast_flags; ast_flags = append ? 0 : LDLM_FL_DENY_ON_CONTENTION; if (file->f_flags & O_NONBLOCK) ast_flags |= LDLM_FL_BLOCK_NOWAIT; node = ll_node_from_inode(inode, start, end, (rw == WRITE) ? LCK_PW : LCK_PR); if (IS_ERR(node)) { rc = PTR_ERR(node); GOTO(out, rc); } tree->lt_fd = LUSTRE_FPRIVATE(file); rc = ll_tree_lock_iov(tree, node, iov, nr_segs, ast_flags); if (rc == 0) tree_locked = 1; else if (rc == -EUSERS) ll_set_file_contended(inode); else GOTO(out, rc); } RETURN(tree_locked);out: return rc;}/* XXX: exact copy from kernel code (__generic_file_aio_write_nolock from rhel4) */static size_t ll_file_get_iov_count(const struct iovec *iov, unsigned long *nr_segs){ size_t count = 0; unsigned long seg; for (seg = 0; seg < *nr_segs; seg++) { const struct iovec *iv = &iov[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ count += iv->iov_len; if (unlikely((ssize_t)(count|iv->iov_len) < 0)) return -EINVAL; if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) continue; if (seg == 0) return -EFAULT; *nr_segs = seg; count -= iv->iov_len; /* This segment is no good */ break; } return count;}static int iov_copy_update(unsigned long *nr_segs, const struct iovec **iov_out, unsigned long *nrsegs_copy, struct iovec *iov_copy, size_t *offset, size_t size){ int i; const struct iovec *iov = *iov_out; for (i = 0; i < *nr_segs; i++) { const struct iovec *iv = &iov[i]; struct iovec *ivc = &iov_copy[i]; *ivc = *iv; if (i == 0) { ivc->iov_len -= *offset; ivc->iov_base += *offset; } if (ivc->iov_len >= size) { ivc->iov_len = size; if (i == 0) *offset += size; else *offset = size; break; } size -= ivc->iov_len; } *iov_out += i; *nr_segs -= i; *nrsegs_copy = i + 1; return 0;}#ifdef HAVE_FILE_READVstatic ssize_t ll_file_readv(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos){#elsestatic ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos){ struct file *file = iocb->ki_filp; loff_t *ppos = &iocb->ki_pos;#endif struct inode *inode = file->f_dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_lock_tree tree; struct ost_lvb lvb; struct ll_ra_read bead; int ra = 0; loff_t end; ssize_t retval, chunk, sum = 0; int tree_locked; struct iovec *iov_copy = NULL; unsigned long nrsegs_copy, nrsegs_orig = 0; size_t count, iov_offset = 0; __u64 kms; ENTRY; count = ll_file_get_iov_count(iov, &nr_segs); CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", inode->i_ino, inode->i_generation, inode, count, *ppos); /* "If nbyte is 0, read() will return 0 and have no other results." * -- Single Unix Spec */ if (count == 0) RETURN(0); ll_stats_ops_tally(sbi, LPROC_LL_READ_BYTES, count); if (!lsm) { /* Read on file with no objects should return zero-filled * buffers up to file size (we can get non-zero sizes with * mknod + truncate, then opening file for read. This is a * common pattern in NFS case, it seems). Bug 6243 */ int notzeroed; /* Since there are no objects on OSTs, we have nothing to get * lock on and so we are forced to access inode->i_size * unguarded */ /* Read beyond end of file */ if (*ppos >= i_size_read(inode)) RETURN(0); if (count > i_size_read(inode) - *ppos) count = i_size_read(inode) - *ppos; /* Make sure to correctly adjust the file pos pointer for * EFAULT case */ for (nrsegs_copy = 0; nrsegs_copy < nr_segs; nrsegs_copy++) { const struct iovec *iv = &iov[nrsegs_copy]; if (count < iv->iov_len) chunk = count; else chunk = iv->iov_len; notzeroed = clear_user(iv->iov_base, chunk); sum += (chunk - notzeroed); count -= (chunk - notzeroed); if (notzeroed || !count) break; } *ppos += sum; if (!sum) RETURN(-EFAULT); RETURN(sum); }repeat: if (sbi->ll_max_rw_chunk != 0) { /* first, let's know the end of the current stripe */ end = *ppos; obd_extent_calc(sbi->ll_osc_exp, lsm, OBD_CALC_STRIPE_END, (obd_off *)&end); /* correct, the end is beyond the request */ if (end > *ppos + count - 1) end = *ppos + count - 1; /* and chunk shouldn't be too large even if striping is wide */ if (end - *ppos > sbi->ll_max_rw_chunk) end = *ppos + sbi->ll_max_rw_chunk - 1; chunk = end - *ppos + 1; if ((count == chunk) && (iov_offset == 0)) { if (iov_copy) OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig); iov_copy = (struct iovec *)iov; nrsegs_copy = nr_segs; } else { if (!iov_copy) { nrsegs_orig = nr_segs; OBD_ALLOC(iov_copy, sizeof(*iov) * nr_segs); if (!iov_copy) GOTO(out, retval = -ENOMEM); } iov_copy_update(&nr_segs, &iov, &nrsegs_copy, iov_copy, &iov_offset, chunk); } } else { end = *ppos + count - 1; iov_copy = (struct iovec *)iov; nrsegs_copy = nr_segs; } tree_locked = ll_file_get_tree_lock_iov(&tree, file, iov_copy, nrsegs_copy, *ppos, end, READ); if (tree_locked < 0) GOTO(out, retval = tree_locked); ll_inode_size_lock(inode, 1); /* * Consistency guarantees: following possibilities exist for the * relation between region being read and real file size at this * moment: * * (A): the region is completely inside of the file; * * (B-x): x bytes of region are inside of the file, the rest is * outside; * * (C): the region is completely outside of the file. * * This classification is stable under DLM lock acquired by * ll_tree_lock() above, because to change class, other client has to * take DLM lock conflicting with our lock. Also, any updates to * ->i_size by other threads on this client are serialized by * ll_inode_size_lock(). This guarantees that short reads are handled * correctly in the face of concurrent writes and truncates. */ inode_init_lvb(inode, &lvb); obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1); kms = lvb.lvb_size; if (*ppos + count - 1 > kms) { /* A glimpse is necessary to determine whether we return a * short read (B) or some zeroes at the end of the buffer (C) */ ll_inode_size_unlock(inode, 1); retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED); if (retval) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -