📄 aops.c
字号:
if (!IS_ERR(handle)) ocfs2_commit_trans(osb, handle); handle = ERR_PTR(ret); } return handle;}static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block){ sector_t status; u64 p_blkno = 0; int err = 0; struct inode *inode = mapping->host; mlog_entry("(block = %llu)\n", (unsigned long long)block); /* We don't need to lock journal system files, since they aren't * accessed concurrently from multiple nodes. */ if (!INODE_JOURNAL(inode)) { err = ocfs2_inode_lock(inode, NULL, 0); if (err) { if (err != -ENOENT) mlog_errno(err); goto bail; } down_read(&OCFS2_I(inode)->ip_alloc_sem); } if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL); if (!INODE_JOURNAL(inode)) { up_read(&OCFS2_I(inode)->ip_alloc_sem); ocfs2_inode_unlock(inode, 0); } if (err) { mlog(ML_ERROR, "get_blocks() failed, block = %llu\n", (unsigned long long)block); mlog_errno(err); goto bail; }bail: status = err ? 0 : p_blkno; mlog_exit((int)status); return status;}/* * TODO: Make this into a generic get_blocks function. * * From do_direct_io in direct-io.c: * "So what we do is to permit the ->get_blocks function to populate * bh.b_size with the size of IO which is permitted at this offset and * this i_blkbits." * * This function is called directly from get_more_blocks in direct-io.c. * * called like this: dio->get_blocks(dio->inode, fs_startblk, * fs_count, map_bh, dio->rw == WRITE); */#ifdef DIO_OLD_GET_BLOCKSstatic int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, struct buffer_head *bh_result, int create){#elsestatic int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create){ unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;#endif int ret; u64 p_blkno, inode_blocks, contig_blocks; unsigned int ext_flags; unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; /* This function won't even be called if the request isn't all * nicely aligned and of the right size, so there's no need * for us to check any of that. */ inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); /* * Any write past EOF is not allowed because we'd be extending. */ if (create && (iblock + max_blocks) > inode_blocks) { ret = -EIO; goto bail; } /* This figures out the size of the next contiguous block, and * our logical offset */ ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &contig_blocks, &ext_flags); if (ret) { mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n", (unsigned long long)iblock); ret = -EIO; goto bail; } if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) { ocfs2_error(inode->i_sb, "Inode %llu has a hole at block %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)iblock); ret = -EROFS; goto bail; } /* * get_more_blocks() expects us to describe a hole by clearing * the mapped bit on bh_result(). * * Consider an unwritten extent as a hole. */ if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) map_bh(bh_result, inode->i_sb, p_blkno); else { /* * ocfs2_prepare_inode_for_write() should have caught * the case where we'd be filling a hole and triggered * a buffered write instead. */ if (create) { ret = -EIO; mlog_errno(ret); goto bail; } clear_buffer_mapped(bh_result); } /* make sure we don't map more than max_blocks blocks here as that's all the kernel will handle at this point. */ if (max_blocks < contig_blocks) contig_blocks = max_blocks; bh_result->b_size = contig_blocks << blocksize_bits;bail: return ret;}/* * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're * particularly interested in the aio/dio case. Like the core uses * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from * truncation on another. */static void ocfs2_dio_end_io(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private){ struct inode *inode = filp_dentry(iocb->ki_filp)->d_inode; int level; /* this io's submitter should not have unlocked this before we could */ BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); ocfs2_iocb_clear_rw_locked(iocb); level = ocfs2_iocb_rw_locked_level(iocb); if (!level) up_read(&inode->i_alloc_sem); ocfs2_rw_unlock(inode, level);}/* * ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen * from ext3. PageChecked() bits have been removed as OCFS2 does not * do journalled data. */#ifdef INVALIDATEPAGE_RETURNS_INTstatic int ocfs2_invalidatepage(struct page *page, unsigned long offset)#elsestatic void ocfs2_invalidatepage(struct page *page, unsigned long offset)#endif{ journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; journal_invalidatepage(journal, page, offset);#ifdef INVALIDATEPAGE_RETURNS_INT return 0;#endif}static int ocfs2_releasepage(struct page *page, gfp_t wait){ journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; if (!page_has_buffers(page)) return 0; return journal_try_to_free_buffers(journal, page, wait);}static ssize_t ocfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs){ struct file *file = iocb->ki_filp; struct inode *inode = filp_dentry(file)->d_inode->i_mapping->host; int ret; mlog_entry_void(); /* * Fallback to buffered I/O if we see an inode without * extents. */ if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) return 0; ret = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ocfs2_direct_IO_get_blocks, ocfs2_dio_end_io); mlog_exit(ret); return ret;}static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, u32 cpos, unsigned int *start, unsigned int *end){ unsigned int cluster_start = 0, cluster_end = PAGE_CACHE_SIZE; if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) { unsigned int cpp; cpp = 1 << (PAGE_CACHE_SHIFT - osb->s_clustersize_bits); cluster_start = cpos % cpp; cluster_start = cluster_start << osb->s_clustersize_bits; cluster_end = cluster_start + osb->s_clustersize; } BUG_ON(cluster_start > PAGE_SIZE); BUG_ON(cluster_end > PAGE_SIZE); if (start) *start = cluster_start; if (end) *end = cluster_end;}/* * 'from' and 'to' are the region in the page to avoid zeroing. * * If pagesize > clustersize, this function will avoid zeroing outside * of the cluster boundary. * * from == to == 0 is code for "zero the entire cluster region" */static void ocfs2_clear_page_regions(struct page *page, struct ocfs2_super *osb, u32 cpos, unsigned from, unsigned to){ void *kaddr; unsigned int cluster_start, cluster_end; ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end); kaddr = kmap_atomic(page, KM_USER0); if (from || to) { if (from > cluster_start) memset(kaddr + cluster_start, 0, from - cluster_start); if (to < cluster_end) memset(kaddr + to, 0, cluster_end - to); } else { memset(kaddr + cluster_start, 0, cluster_end - cluster_start); } kunmap_atomic(kaddr, KM_USER0);}/* * Nonsparse file systems fully allocate before we get to the write * code. This prevents ocfs2_write() from tagging the write as an * allocating one, which means ocfs2_map_page_blocks() might try to * read-in the blocks at the tail of our file. Avoid reading them by * testing i_size against each block offset. */static int ocfs2_should_read_blk(struct inode *inode, struct page *page, unsigned int block_start){ u64 offset = page_offset(page) + block_start; if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) return 1; if (i_size_read(inode) > offset) return 1; return 0;}/* * Some of this taken from block_prepare_write(). We already have our * mapping by now though, and the entire write will be allocating or * it won't, so not much need to use BH_New. * * This will also skip zeroing, which is handled externally. */int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, struct inode *inode, unsigned int from, unsigned int to, int new){ int ret = 0; struct buffer_head *head, *bh, *wait[2], **wait_bh = wait; unsigned int block_end, block_start; unsigned int bsize = 1 << inode->i_blkbits; if (!page_has_buffers(page)) create_empty_buffers(page, bsize, 0); head = page_buffers(page); for (bh = head, block_start = 0; bh != head || !block_start; bh = bh->b_this_page, block_start += bsize) { block_end = block_start + bsize; clear_buffer_new(bh); /* * Ignore blocks outside of our i/o range - * they may belong to unallocated clusters. */ if (block_start >= to || block_end <= from) { if (PageUptodate(page)) set_buffer_uptodate(bh); continue; } /* * For an allocating write with cluster size >= page * size, we always write the entire page. */ if (new) set_buffer_new(bh); if (!buffer_mapped(bh)) { map_bh(bh, inode->i_sb, *p_blkno); unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); } if (PageUptodate(page)) { if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_new(bh) && ocfs2_should_read_blk(inode, page, block_start) && (block_start < from || block_end > to)) { ll_rw_block(READ, 1, &bh); *wait_bh++=bh; } *p_blkno = *p_blkno + 1; } /* * If we issued read requests - let them complete. */ while(wait_bh > wait) { wait_on_buffer(*--wait_bh); if (!buffer_uptodate(*wait_bh)) ret = -EIO; } if (ret == 0 || !new) return ret; /* * If we get -EIO above, zero out any newly allocated blocks * to avoid exposing stale data. */ bh = head; block_start = 0; do { block_end = block_start + bsize; if (block_end <= from) goto next_bh; if (block_start >= to) break; zero_user_page(page, block_start, bh->b_size, KM_USER0); set_buffer_uptodate(bh); mark_buffer_dirty(bh);next_bh: block_start = block_end; bh = bh->b_this_page; } while (bh != head); return ret;}#if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE)#define OCFS2_MAX_CTXT_PAGES 1#else#define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE)#endif#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE)/* * Describe the state of a single cluster to be written to. */struct ocfs2_write_cluster_desc { u32 c_cpos; u32 c_phys; /* * Give this a unique field because c_phys eventually gets * filled. */ unsigned c_new; unsigned c_unwritten;};static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d){ return d->c_new || d->c_unwritten;}struct ocfs2_write_ctxt { /* Logical cluster position / len of write */ u32 w_cpos; u32 w_clen; struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; /* * This is true if page_size > cluster_size. * * It triggers a set of special cases during write which might * have to deal with allocating writes to partial pages. */ unsigned int w_large_pages; /* * Pages involved in this write. * * w_target_page is the page being written to by the user. * * w_pages is an array of pages which always contains * w_target_page, and in the case of an allocating write with * page_size < cluster size, it will contain zero'd and mapped * pages adjacent to w_target_page which need to be written * out in so that future reads from that region will get * zero's. */ struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; unsigned int w_num_pages; struct page *w_target_page; /* * ocfs2_write_end() uses this to know what the real range to * write in the target should be. */ unsigned int w_target_from; unsigned int w_target_to; /* * We could use journal_current_handle() but this is cleaner, * IMHO -Mark */ handle_t *w_handle; struct buffer_head *w_di_bh; struct ocfs2_cached_dealloc_ctxt w_dealloc;};void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages){ int i; for(i = 0; i < num_pages; i++) { if (pages[i]) { unlock_page(pages[i]); mark_page_accessed(pages[i]); page_cache_release(pages[i]); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -