📄 write.c
字号:
/* * linux/fs/nfs/write.c * * Writing file data over NFS. * * We do it like this: When a (user) process wishes to write data to an * NFS file, a write request is allocated that contains the RPC task data * plus some info on the page to be written, and added to the inode's * write chain. If the process writes past the end of the page, an async * RPC call to write the page is scheduled immediately; otherwise, the call * is delayed for a few seconds. * * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE. * * Write requests are kept on the inode's writeback list. Each entry in * that list references the page (portion) to be written. When the * cache timeout has expired, the RPC task is woken up, and tries to * lock the page. As soon as it manages to do so, the request is moved * from the writeback list to the writelock list. * * Note: we must make sure never to confuse the inode passed in the * write_page request with the one in page->inode. As far as I understand * it, these are different when doing a swap-out. * * To understand everything that goes on here and in the NFS read code, * one should be aware that a page is locked in exactly one of the following * cases: * * - A write request is in progress. * - A user process is in generic_file_write/nfs_update_page * - A user process is in generic_file_read * * Also note that because of the way pages are invalidated in * nfs_revalidate_inode, the following assertions hold: * * - If a page is dirty, there will be no read requests (a page will * not be re-read unless invalidated by nfs_revalidate_inode). * - If the page is not uptodate, there will be no pending write * requests, and no process will be in nfs_update_page. * * FIXME: Interaction with the vmscan routines is not optimal yet. * Either vmscan must be made nfs-savvy, or we need a different page * reclaim concept that supports something like FS-independent * buffer_heads with a b_ops-> field. * * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> */#include <linux/config.h>#include <linux/types.h>#include <linux/slab.h>#include <linux/mm.h>#include <linux/pagemap.h>#include <linux/file.h>#include <linux/mpage.h>#include <linux/writeback.h>#include <linux/sunrpc/clnt.h>#include <linux/nfs_fs.h>#include <linux/nfs_mount.h>#include <linux/nfs_page.h>#include <asm/uaccess.h>#include <linux/smp_lock.h>#include <linux/mempool.h>#include "delegation.h"#define NFSDBG_FACILITY NFSDBG_PAGECACHE#define MIN_POOL_WRITE (32)#define MIN_POOL_COMMIT (4)/* * Local function declarations */static struct nfs_page * nfs_update_request(struct nfs_open_context*, struct inode *, struct page *, unsigned int, unsigned int);static void nfs_writeback_done_partial(struct nfs_write_data *, int);static void nfs_writeback_done_full(struct nfs_write_data *, int);static int nfs_wait_on_write_congestion(struct address_space *, int);static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);static kmem_cache_t *nfs_wdata_cachep;static mempool_t *nfs_wdata_mempool;static mempool_t *nfs_commit_mempool;static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);static __inline__ struct nfs_write_data *nfs_writedata_alloc(void){ struct nfs_write_data *p; p = (struct nfs_write_data *)mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); } return p;}static __inline__ void nfs_writedata_free(struct nfs_write_data *p){ mempool_free(p, nfs_wdata_mempool);}static void nfs_writedata_release(struct rpc_task *task){ struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; nfs_writedata_free(wdata);}static __inline__ struct nfs_write_data *nfs_commit_alloc(void){ struct nfs_write_data *p; p = (struct nfs_write_data *)mempool_alloc(nfs_commit_mempool, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); } return p;}static __inline__ void nfs_commit_free(struct nfs_write_data *p){ mempool_free(p, nfs_commit_mempool);}/* Adjust the file length if we're writing beyond the end */static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count){ struct inode *inode = page->mapping->host; loff_t end, i_size = i_size_read(inode); unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; if (i_size > 0 && page->index < end_index) return; end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); if (i_size >= end) return; i_size_write(inode, end);}/* We can set the PG_uptodate flag if we see that a write request * covers the full page. */static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count){ loff_t end_offs; if (PageUptodate(page)) return; if (base != 0) return; if (count == PAGE_CACHE_SIZE) { SetPageUptodate(page); return; } end_offs = i_size_read(page->mapping->host) - 1; if (end_offs < 0) return; /* Is this the last page? */ if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT)) return; /* This is the last page: set PG_uptodate if we cover the entire * extent of the data, then zero the rest of the page. */ if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) { memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count); SetPageUptodate(page); }}/* * Write a page synchronously. * Offset is the data offset within the page. */static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, struct page *page, unsigned int offset, unsigned int count, int how){ unsigned int wsize = NFS_SERVER(inode)->wsize; int result, written = 0; struct nfs_write_data *wdata; wdata = kmalloc(sizeof(*wdata), GFP_NOFS); if (!wdata) return -ENOMEM; memset(wdata, 0, sizeof(*wdata)); wdata->flags = how; wdata->cred = ctx->cred; wdata->inode = inode; wdata->args.fh = NFS_FH(inode); wdata->args.context = ctx; wdata->args.pages = &page; wdata->args.stable = NFS_FILE_SYNC; wdata->args.pgbase = offset; wdata->args.count = wsize; wdata->res.fattr = &wdata->fattr; wdata->res.verf = &wdata->verf; dprintk("NFS: nfs_writepage_sync(%s/%Ld %d@%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), count, (long long)(page_offset(page) + offset)); nfs_begin_data_update(inode); do { if (count < wsize) wdata->args.count = count; wdata->args.offset = page_offset(page) + wdata->args.pgbase; result = NFS_PROTO(inode)->write(wdata); if (result < 0) { /* Must mark the page invalid after I/O error */ ClearPageUptodate(page); goto io_error; } if (result < wdata->args.count) printk(KERN_WARNING "NFS: short write, count=%u, result=%d\n", wdata->args.count, result); wdata->args.offset += result; wdata->args.pgbase += result; written += result; count -= result; } while (count); /* Update file length */ nfs_grow_file(page, offset, written); /* Set the PG_uptodate flag? */ nfs_mark_uptodate(page, offset, written); if (PageError(page)) ClearPageError(page);io_error: nfs_end_data_update_defer(inode); kfree(wdata); return written ? written : result;}static int nfs_writepage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page, unsigned int offset, unsigned int count){ struct nfs_page *req; int status; req = nfs_update_request(ctx, inode, page, offset, count); status = (IS_ERR(req)) ? PTR_ERR(req) : 0; if (status < 0) goto out; /* Update file length */ nfs_grow_file(page, offset, count); /* Set the PG_uptodate flag? */ nfs_mark_uptodate(page, offset, count); nfs_unlock_request(req); out: return status;}static int wb_priority(struct writeback_control *wbc){ if (wbc->for_reclaim) return FLUSH_HIGHPRI; if (wbc->for_kupdate) return FLUSH_LOWPRI; return 0;}/* * Write an mmapped page to the server. */int nfs_writepage(struct page *page, struct writeback_control *wbc){ struct nfs_open_context *ctx; struct inode *inode = page->mapping->host; unsigned long end_index; unsigned offset = PAGE_CACHE_SIZE; loff_t i_size = i_size_read(inode); int inode_referenced = 0; int priority = wb_priority(wbc); int err; /* * Note: We need to ensure that we have a reference to the inode * if we are to do asynchronous writes. If not, waiting * in nfs_wait_on_request() may deadlock with clear_inode(). * * If igrab() fails here, then it is in any case safe to * call nfs_wb_page(), since there will be no pending writes. */ if (igrab(inode) != 0) inode_referenced = 1; end_index = i_size >> PAGE_CACHE_SHIFT; /* Ensure we've flushed out any previous writes */ nfs_wb_page_priority(inode, page, priority); /* easy case */ if (page->index < end_index) goto do_it; /* things got complicated... */ offset = i_size & (PAGE_CACHE_SIZE-1); /* OK, are we completely out? */ err = 0; /* potential race with truncate - ignore */ if (page->index >= end_index+1 || !offset) goto out;do_it: ctx = nfs_find_open_context(inode, FMODE_WRITE); if (ctx == NULL) { err = -EBADF; goto out; } lock_kernel(); if (!IS_SYNC(inode) && inode_referenced) { err = nfs_writepage_async(ctx, inode, page, 0, offset); if (err >= 0) { err = 0; if (wbc->for_reclaim) nfs_flush_inode(inode, 0, 0, FLUSH_STABLE); } } else { err = nfs_writepage_sync(ctx, inode, page, 0, offset, priority); if (err >= 0) { if (err != offset) redirty_page_for_writepage(wbc, page); err = 0; } } unlock_kernel(); put_nfs_open_context(ctx);out: unlock_page(page); if (inode_referenced) iput(inode); return err; }/* * Note: causes nfs_update_request() to block on the assumption * that the writeback is generated due to memory pressure. */int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc){ struct backing_dev_info *bdi = mapping->backing_dev_info; struct inode *inode = mapping->host; int err; err = generic_writepages(mapping, wbc); if (err) return err; while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) { if (wbc->nonblocking) return 0; nfs_wait_on_write_congestion(mapping, 0); } err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); if (err < 0) goto out; wbc->nr_to_write -= err; if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) { err = nfs_wait_on_requests(inode, 0, 0); if (err < 0) goto out; } err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc)); if (err > 0) { wbc->nr_to_write -= err; err = 0; }out: clear_bit(BDI_write_congested, &bdi->state); wake_up_all(&nfs_write_congestion); return err;}/* * Insert a write request into an inode */static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req){ struct nfs_inode *nfsi = NFS_I(inode); int error; error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); BUG_ON(error == -EEXIST); if (error) return error; if (!nfsi->npages) { igrab(inode); nfs_begin_data_update(inode); if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } nfsi->npages++; atomic_inc(&req->wb_count); return 0;}/* * Insert a write request into an inode */static void nfs_inode_remove_request(struct nfs_page *req){ struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&nfsi->req_lock); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; if (!nfsi->npages) { spin_unlock(&nfsi->req_lock); nfs_end_data_update_defer(inode); iput(inode); } else spin_unlock(&nfsi->req_lock); nfs_clear_request(req); nfs_release_request(req);}/* * Find a request */static inline struct nfs_page *_nfs_find_request(struct inode *inode, unsigned long index){ struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req; req = (struct nfs_page*)radix_tree_lookup(&nfsi->nfs_page_tree, index); if (req) atomic_inc(&req->wb_count); return req;}static struct nfs_page *nfs_find_request(struct inode *inode, unsigned long index){ struct nfs_page *req; struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfsi->req_lock); req = _nfs_find_request(inode, index); spin_unlock(&nfsi->req_lock); return req;}/* * Add a request to the inode's dirty list. */static voidnfs_mark_request_dirty(struct nfs_page *req){ struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfsi->req_lock); nfs_list_add_request(req, &nfsi->dirty); nfsi->ndirty++; spin_unlock(&nfsi->req_lock); inc_page_state(nr_dirty); mark_inode_dirty(inode);}/* * Check if a request is dirty */static inline intnfs_dirty_request(struct nfs_page *req){ struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -