📄 file.c
字号:
ll_tree_unlock(&tree); goto out; } } else { /* region is within kms and, hence, within real file size (A). * We need to increase i_size to cover the read region so that * generic_file_read() will do its job, but that doesn't mean * the kms size is _correct_, it is only the _minimum_ size. * If someone does a stat they will get the correct size which * will always be >= the kms value here. b=11081 */ if (i_size_read(inode) < kms) i_size_write(inode, kms); ll_inode_size_unlock(inode, 1); } chunk = end - *ppos + 1; CDEBUG(D_INODE,"Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n", inode->i_ino, chunk, *ppos, i_size_read(inode)); /* turn off the kernel's read-ahead */ if (tree_locked) {#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) file->f_ramax = 0;#else file->f_ra.ra_pages = 0;#endif /* initialize read-ahead window once per syscall */ if (ra == 0) { ra = 1; bead.lrr_start = *ppos >> CFS_PAGE_SHIFT; bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; ll_ra_read_in(file, &bead); } /* BUG: 5972 */ file_accessed(file);#ifdef HAVE_FILE_READV retval = generic_file_readv(file, iov_copy, nrsegs_copy, ppos);#else retval = generic_file_aio_read(iocb, iov_copy, nrsegs_copy, *ppos);#endif ll_tree_unlock(&tree); } else { retval = ll_file_lockless_io(file, iov_copy, nrsegs_copy, ppos, READ, chunk); } ll_rw_stats_tally(sbi, current->pid, file, count, 0); if (retval > 0) { count -= retval; sum += retval; if (retval == chunk && count > 0) goto repeat; } out: if (ra != 0) ll_ra_read_ex(file, &bead); retval = (sum > 0) ? sum : retval; if (iov_copy && iov_copy != iov) OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig); RETURN(retval);}static ssize_t ll_file_read(struct file *file, char *buf, size_t count, loff_t *ppos){ struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };#ifdef HAVE_FILE_READV return ll_file_readv(file, &local_iov, 1, ppos);#else struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, file); kiocb.ki_pos = *ppos; kiocb.ki_left = count; ret = ll_file_aio_read(&kiocb, &local_iov, 1, kiocb.ki_pos); *ppos = kiocb.ki_pos; return ret;#endif}/* * Write to a file (through the page cache). */#ifdef HAVE_FILE_WRITEVstatic ssize_t ll_file_writev(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos){#else /* AIO stuff */static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos){ struct file *file = iocb->ki_filp; loff_t *ppos = &iocb->ki_pos;#endif struct inode *inode = file->f_dentry->d_inode; struct ll_sb_info *sbi = ll_i2sbi(inode); struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; struct ll_lock_tree tree; loff_t maxbytes = ll_file_maxbytes(inode); loff_t lock_start, lock_end, end; ssize_t retval, chunk, sum = 0; int tree_locked; struct iovec *iov_copy = NULL; unsigned long nrsegs_copy, nrsegs_orig = 0; size_t count, iov_offset = 0; ENTRY; count = ll_file_get_iov_count(iov, &nr_segs); CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", inode->i_ino, inode->i_generation, inode, count, *ppos); SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */ /* POSIX, but surprised the VFS doesn't check this already */ if (count == 0) RETURN(0); /* If file was opened for LL_IOC_LOV_SETSTRIPE but the ioctl wasn't * called on the file, don't fail the below assertion (bug 2388). */ if (file->f_flags & O_LOV_DELAY_CREATE && ll_i2info(inode)->lli_smd == NULL) RETURN(-EBADF); LASSERT(ll_i2info(inode)->lli_smd != NULL); down(&ll_i2info(inode)->lli_write_sem);repeat: chunk = 0; /* just to fix gcc's warning */ end = *ppos + count - 1; if (file->f_flags & O_APPEND) { lock_start = 0; lock_end = OBD_OBJECT_EOF; iov_copy = (struct iovec *)iov; nrsegs_copy = nr_segs; } else if (sbi->ll_max_rw_chunk != 0) { /* first, let's know the end of the current stripe */ end = *ppos; obd_extent_calc(sbi->ll_osc_exp, lsm, OBD_CALC_STRIPE_END, (obd_off *)&end); /* correct, the end is beyond the request */ if (end > *ppos + count - 1) end = *ppos + count - 1; /* and chunk shouldn't be too large even if striping is wide */ if (end - *ppos > sbi->ll_max_rw_chunk) end = *ppos + sbi->ll_max_rw_chunk - 1; lock_start = *ppos; lock_end = end; chunk = end - *ppos + 1; if ((count == chunk) && (iov_offset == 0)) { if (iov_copy) OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig); iov_copy = (struct iovec *)iov; nrsegs_copy = nr_segs; } else { if (!iov_copy) { nrsegs_orig = nr_segs; OBD_ALLOC(iov_copy, sizeof(*iov) * nr_segs); if (!iov_copy) GOTO(out, retval = -ENOMEM); } iov_copy_update(&nr_segs, &iov, &nrsegs_copy, iov_copy, &iov_offset, chunk); } } else { lock_start = *ppos; lock_end = end; iov_copy = (struct iovec *)iov; nrsegs_copy = nr_segs; } tree_locked = ll_file_get_tree_lock_iov(&tree, file, iov_copy, nrsegs_copy, lock_start, lock_end, WRITE); if (tree_locked < 0) GOTO(out, retval = tree_locked); /* This is ok, g_f_w will overwrite this under i_sem if it races * with a local truncate, it just makes our maxbyte checking easier. * The i_size value gets updated in ll_extent_lock() as a consequence * of the [0,EOF] extent lock we requested above. */ if (file->f_flags & O_APPEND) { *ppos = i_size_read(inode); end = *ppos + count - 1; } if (*ppos >= maxbytes) { send_sig(SIGXFSZ, current, 0); GOTO(out_unlock, retval = -EFBIG); } if (end > maxbytes - 1) end = maxbytes - 1; /* generic_file_write handles O_APPEND after getting i_mutex */ chunk = end - *ppos + 1; CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n", inode->i_ino, chunk, *ppos); if (tree_locked)#ifdef HAVE_FILE_WRITEV retval = generic_file_writev(file, iov_copy, nrsegs_copy, ppos);#else retval = generic_file_aio_write(iocb, iov_copy, nrsegs_copy, *ppos);#endif else retval = ll_file_lockless_io(file, iov_copy, nrsegs_copy, ppos, WRITE, chunk); ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, chunk, 1);out_unlock: if (tree_locked) ll_tree_unlock(&tree);out: if (retval > 0) { count -= retval; sum += retval; if (retval == chunk && count > 0) goto repeat; } up(&ll_i2info(inode)->lli_write_sem); if (iov_copy && iov_copy != iov) OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig); retval = (sum > 0) ? sum : retval; ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES, retval > 0 ? retval : 0); RETURN(retval);}static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos){ struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };#ifdef HAVE_FILE_WRITEV return ll_file_writev(file, &local_iov, 1, ppos);#else struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, file); kiocb.ki_pos = *ppos; kiocb.ki_left = count; ret = ll_file_aio_write(&kiocb, &local_iov, 1, kiocb.ki_pos); *ppos = kiocb.ki_pos; return ret;#endif}/* * Send file content (through pagecache) somewhere with helper */#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, read_actor_t actor, void *target){ struct inode *inode = in_file->f_dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; struct ll_lock_tree tree; struct ll_lock_tree_node *node; struct ost_lvb lvb; struct ll_ra_read bead; int rc; ssize_t retval; __u64 kms; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", inode->i_ino, inode->i_generation, inode, count, *ppos); /* "If nbyte is 0, read() will return 0 and have no other results." * -- Single Unix Spec */ if (count == 0) RETURN(0); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_READ_BYTES, count); /* turn off the kernel's read-ahead */ in_file->f_ra.ra_pages = 0; /* File with no objects, nothing to lock */ if (!lsm) RETURN(generic_file_sendfile(in_file, ppos, count, actor, target)); node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR); if (IS_ERR(node)) RETURN(PTR_ERR(node)); tree.lt_fd = LUSTRE_FPRIVATE(in_file); rc = ll_tree_lock(&tree, node, NULL, count, in_file->f_flags & O_NONBLOCK?LDLM_FL_BLOCK_NOWAIT:0); if (rc != 0) RETURN(rc); ll_clear_file_contended(inode); ll_inode_size_lock(inode, 1); /* * Consistency guarantees: following possibilities exist for the * relation between region being read and real file size at this * moment: * * (A): the region is completely inside of the file; * * (B-x): x bytes of region are inside of the file, the rest is * outside; * * (C): the region is completely outside of the file. * * This classification is stable under DLM lock acquired by * ll_tree_lock() above, because to change class, other client has to * take DLM lock conflicting with our lock. Also, any updates to * ->i_size by other threads on this client are serialized by * ll_inode_size_lock(). This guarantees that short reads are handled * correctly in the face of concurrent writes and truncates. */ inode_init_lvb(inode, &lvb); obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1); kms = lvb.lvb_size; if (*ppos + count - 1 > kms) { /* A glimpse is necessary to determine whether we return a * short read (B) or some zeroes at the end of the buffer (C) */ ll_inode_size_unlock(inode, 1); retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED); if (retval) goto out; } else { /* region is within kms and, hence, within real file size (A) */ i_size_write(inode, kms); ll_inode_size_unlock(inode, 1); } CDEBUG(D_INFO, "Send
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -