xfs_lrw.c
来自「优龙2410linux2.6.8内核源代码」· C语言 代码 · 共 1,029 行 · 第 1/2 页
C
1,029 行
xfs_extlen_t buf_len_fsb; xfs_extlen_t prev_zero_count; xfs_mount_t *mp; int nimaps; int error = 0; xfs_bmbt_irec_t imap; loff_t loff; size_t lsize; ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); mp = io->io_mount; /* * First handle zeroing the block on which isize resides. * We only zero a part of that block so it is handled specially. */ error = xfs_zero_last_block(ip, io, offset, isize, end_size); if (error) { ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); return error; } /* * Calculate the range between the new size and the old * where blocks needing to be zeroed may exist. To get the * block where the last byte in the file currently resides, * we need to subtract one from the size and truncate back * to a block boundary. We subtract 1 in case the size is * exactly on a block boundary. */ last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); if (last_fsb == end_zero_fsb) { /* * The size was only incremented on its last block. * We took care of that above, so just return. */ return 0; } ASSERT(start_zero_fsb <= end_zero_fsb); prev_zero_fsb = NULLFILEOFF; prev_zero_count = 0; while (start_zero_fsb <= end_zero_fsb) { nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, 0, NULL, 0, &imap, &nimaps, NULL); if (error) { ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); return error; } ASSERT(nimaps > 0); if (imap.br_state == XFS_EXT_UNWRITTEN || imap.br_startblock == HOLESTARTBLOCK) { /* * This loop handles initializing pages that were * partially initialized by the code below this * loop. It basically zeroes the part of the page * that sits on a hole and sets the page as P_HOLE * and calls remapf if it is a mapped file. */ prev_zero_fsb = NULLFILEOFF; prev_zero_count = 0; start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); continue; } /* * There are blocks in the range requested. * Zero them a single write at a time. We actually * don't zero the entire range returned if it is * too big and simply loop around to get the rest. * That is not the most efficient thing to do, but it * is simple and this path should not be exercised often. */ buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount, mp->m_writeio_blocks << 8); /* * Drop the inode lock while we're doing the I/O. * We'll still have the iolock to protect us. */ XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); loff = XFS_FSB_TO_B(mp, start_zero_fsb); lsize = XFS_FSB_TO_B(mp, buf_len_fsb); error = xfs_iozero(ip, loff, lsize, end_size); if (error) { goto out_lock; } prev_zero_fsb = start_zero_fsb; prev_zero_count = buf_len_fsb; start_zero_fsb = imap.br_startoff + buf_len_fsb; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); } return 0;out_lock: XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); ASSERT(error >= 0); return error;}ssize_t /* bytes written, or (-) error */xfs_write( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, int ioflags, cred_t *credp){ struct file *file = iocb->ki_filp; size_t size = 0; xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret; int error = 0; xfs_fsize_t isize, new_size; xfs_fsize_t n, limit; xfs_iocore_t *io; vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; vrwlock_t locktype; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); /* START copy & waste from filemap.c */ for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ size += iv->iov_len; if (unlikely((ssize_t)(size|iv->iov_len) < 0)) return XFS_ERROR(-EINVAL); } /* END copy & waste from filemap.c */ if (size == 0) return 0; io = &xip->i_iocore; mp = io->io_mount; if (XFS_FORCED_SHUTDOWN(mp)) { return -EIO; } if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->pbr_smask) || (size & target->pbr_smask)) { return XFS_ERROR(-EINVAL); } iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } else { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; limit = XFS_MAXIOFFSET(mp); if (file->f_flags & O_APPEND) *offset = isize;start: n = limit - *offset; if (n <= 0) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return -EFBIG; } if (n < size) size = n; new_size = *offset + size; if (new_size > isize) { io->io_new_size = new_size; } if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = *offset; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, *offset, size, dmflags, &locktype); if (error) { xfs_iunlock(xip, iolock); return -error; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != xip->i_d.di_size) { *offset = isize = xip->i_d.di_size; goto start; } } /* * On Linux, generic_file_write updates the times even if * no data is copied in so long as the write had a size. * * We must update xfs' times since revalidate will overcopy xfs. */ if (size && !(ioflags & IO_INVIS)) xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, isize, *offset + size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return(-error); } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (error) { xfs_iunlock(xip, iolock); return -error; } }retry: if (ioflags & IO_ISDIRECT) { xfs_inval_cached_pages(vp, io, *offset, 1, 1); xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, iovp, segs, *offset, ioflags); } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, iovp, segs, *offset, ioflags); } ret = generic_file_aio_write_nolock(iocb, iovp, segs, offset); if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) return -error; xfs_rwlock(bdp, locktype); *offset = xip->i_d.di_size; goto retry; } if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { struct inode *inode = LINVFS_GET_IP(vp); xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } if (ret <= 0) { xfs_rwunlock(bdp, locktype); return ret; } XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ xfs_inode_log_item_t *iip; xfs_lsn_t lsn; iip = xip->i_itemp; if (iip && iip->ili_last_lsn) { lsn = iip->ili_last_lsn; xfs_log_force(mp, lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); } } } /* (ioflags & O_SYNC) */ xfs_rwunlock(bdp, locktype); return(ret);}/* * All xfs metadata buffers except log state machine buffers * get this attached as their b_bdstrat callback function. * This is so that we can catch a buffer * after prematurely unpinning it to forcibly shutdown the filesystem. */intxfs_bdstrat_cb(struct xfs_buf *bp){ xfs_mount_t *mp; mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); if (!XFS_FORCED_SHUTDOWN(mp)) { pagebuf_iorequest(bp); return 0; } else { xfs_buftrace("XFS__BDSTRAT IOERROR", bp); /* * Metadata write that didn't get logged but * written delayed anyway. These aren't associated * with a transaction, and can be ignored. */ if (XFS_BUF_IODONE_FUNC(bp) == NULL && (XFS_BUF_ISREAD(bp)) == 0) return (xfs_bioerror_relse(bp)); else return (xfs_bioerror(bp)); }}intxfs_bmap(bhv_desc_t *bdp, xfs_off_t offset, ssize_t count, int flags, xfs_iomap_t *iomapp, int *niomaps){ xfs_inode_t *ip = XFS_BHVTOI(bdp); xfs_iocore_t *io = &ip->i_iocore; ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); return xfs_iomap(io, offset, count, flags, iomapp, niomaps);}/* * Wrapper around bdstrat so that we can stop data * from going to disk in case we are shutting down the filesystem. * Typically user data goes thru this path; one of the exceptions * is the superblock. */intxfsbdstrat( struct xfs_mount *mp, struct xfs_buf *bp){ ASSERT(mp); if (!XFS_FORCED_SHUTDOWN(mp)) { /* Grio redirection would go here * if (XFS_BUF_IS_GRIO(bp)) { */ pagebuf_iorequest(bp); return 0; } xfs_buftrace("XFSBDSTRAT IOERROR", bp); return (xfs_bioerror_relse(bp));}/* * If the underlying (data/log/rt) device is readonly, there are some * operations that cannot proceed. */intxfs_dev_is_read_only( xfs_mount_t *mp, char *message){ if (xfs_readonly_buftarg(mp->m_ddev_targp) || xfs_readonly_buftarg(mp->m_logdev_targp) || (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { cmn_err(CE_NOTE, "XFS: %s required on read-only device.", message); cmn_err(CE_NOTE, "XFS: write access unavailable, cannot proceed."); return EROFS; } return 0;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?