📄 filemap.c
字号:
cli(); if (!(total_reada > PROFILE_MAXREADCOUNT)) { restore_flags(flags); return; } printk("Readahead average: max=%ld, len=%ld, win=%ld, async=%ld%%\n", total_ramax/total_reada, total_ralen/total_reada, total_rawin/total_reada, (total_async*100)/total_reada);#ifdef DEBUG_READAHEAD printk("Readahead snapshot: max=%ld, len=%ld, win=%ld, raend=%Ld\n", filp->f_ramax, filp->f_ralen, filp->f_rawin, filp->f_raend);#endif total_reada = 0; total_async = 0; total_ramax = 0; total_ralen = 0; total_rawin = 0; restore_flags(flags); }}#endif /* defined PROFILE_READAHEAD *//* * Read-ahead context: * ------------------- * The read ahead context fields of the "struct file" are the following: * - f_raend : position of the first byte after the last page we tried to * read ahead. * - f_ramax : current read-ahead maximum size. * - f_ralen : length of the current IO read block we tried to read-ahead. * - f_rawin : length of the current read-ahead window. * if last read-ahead was synchronous then * f_rawin = f_ralen * otherwise (was asynchronous) * f_rawin = previous value of f_ralen + f_ralen * * Read-ahead limits: * ------------------ * MIN_READAHEAD : minimum read-ahead size when read-ahead. * MAX_READAHEAD : maximum read-ahead size when read-ahead. * * Synchronous read-ahead benefits: * -------------------------------- * Using reasonable IO xfer length from peripheral devices increase system * performances. * Reasonable means, in this context, not too large but not too small. * The actual maximum value is: * MAX_READAHEAD + PAGE_CACHE_SIZE = 76k is CONFIG_READA_SMALL is undefined * and 32K if defined (4K page size assumed). * * Asynchronous read-ahead benefits: * --------------------------------- * Overlapping next read request and user process execution increase system * performance. * * Read-ahead risks: * ----------------- * We have to guess which further data are needed by the user process. * If these data are often not really needed, it's bad for system * performances. * However, we know that files are often accessed sequentially by * application programs and it seems that it is possible to have some good * strategy in that guessing. * We only try to read-ahead files that seems to be read sequentially. * * Asynchronous read-ahead risks: * ------------------------------ * In order to maximize overlapping, we must start some asynchronous read * request from the device, as soon as possible. * We must be very careful about: * - The number of effective pending IO read requests. * ONE seems to be the only reasonable value. * - The total memory pool usage for the file access stream. * This maximum memory usage is implicitly 2 IO read chunks: * 2*(MAX_READAHEAD + PAGE_CACHE_SIZE) = 156K if CONFIG_READA_SMALL is undefined, * 64k if defined (4K page size assumed). */static inline int get_max_readahead(struct inode * inode){ if (!inode->i_dev || !max_readahead[MAJOR(inode->i_dev)]) return vm_max_readahead; return max_readahead[MAJOR(inode->i_dev)][MINOR(inode->i_dev)];}static void generic_file_readahead(int reada_ok, struct file * filp, struct inode * inode, struct page * page){ unsigned long end_index; unsigned long index = page->index; unsigned long max_ahead, ahead; unsigned long raend; int max_readahead = get_max_readahead(inode); end_index = inode->i_size >> PAGE_CACHE_SHIFT; raend = filp->f_raend; max_ahead = 0;/* * The current page is locked. * If the current position is inside the previous read IO request, do not * try to reread previously read ahead pages. * Otherwise decide or not to read ahead some pages synchronously. * If we are not going to read ahead, set the read ahead context for this * page only. */ if (PageLocked(page)) { if (!filp->f_ralen || index >= raend || index + filp->f_rawin < raend) { raend = index; if (raend < end_index) max_ahead = filp->f_ramax; filp->f_rawin = 0; filp->f_ralen = 1; if (!max_ahead) { filp->f_raend = index + filp->f_ralen; filp->f_rawin += filp->f_ralen; } } }/* * The current page is not locked. * If we were reading ahead and, * if the current max read ahead size is not zero and, * if the current position is inside the last read-ahead IO request, * it is the moment to try to read ahead asynchronously. * We will later force unplug device in order to force asynchronous read IO. */ else if (reada_ok && filp->f_ramax && raend >= 1 && index <= raend && index + filp->f_ralen >= raend) {/* * Add ONE page to max_ahead in order to try to have about the same IO max size * as synchronous read-ahead (MAX_READAHEAD + 1)*PAGE_CACHE_SIZE. * Compute the position of the last page we have tried to read in order to * begin to read ahead just at the next page. */ raend -= 1; if (raend < end_index) max_ahead = filp->f_ramax + 1; if (max_ahead) { filp->f_rawin = filp->f_ralen; filp->f_ralen = 0; reada_ok = 2; } }/* * Try to read ahead pages. * We hope that ll_rw_blk() plug/unplug, coalescence, requests sort and the * scheduler, will work enough for us to avoid too bad actuals IO requests. */ ahead = 0; while (ahead < max_ahead) { ahead ++; if ((raend + ahead) >= end_index) break; if (page_cache_read(filp, raend + ahead) < 0) break; }/* * If we tried to read ahead some pages, * If we tried to read ahead asynchronously, * Try to force unplug of the device in order to start an asynchronous * read IO request. * Update the read-ahead context. * Store the length of the current read-ahead window. * Double the current max read ahead size. * That heuristic avoid to do some large IO for files that are not really * accessed sequentially. */ if (ahead) { filp->f_ralen += ahead; filp->f_rawin += filp->f_ralen; filp->f_raend = raend + ahead + 1; filp->f_ramax += filp->f_ramax; if (filp->f_ramax > max_readahead) filp->f_ramax = max_readahead;#ifdef PROFILE_READAHEAD profile_readahead((reada_ok == 2), filp);#endif } return;}/* * Mark a page as having seen activity. * * If it was already so marked, move it * to the active queue and drop the referenced * bit. Otherwise, just mark it for future * action.. */void mark_page_accessed(struct page *page){ if (!PageActive(page) && PageReferenced(page)) { activate_page(page); ClearPageReferenced(page); return; } /* Mark the page referenced, AFTER checking for previous usage.. */ SetPageReferenced(page);}/* * This is a generic file read routine, and uses the * inode->i_op->readpage() function for the actual low-level * stuff. * * This is really ugly. But the goto's actually try to clarify some * of the logic when it comes to error handling etc. */void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor){ struct address_space *mapping = filp->f_dentry->d_inode->i_mapping; struct inode *inode = mapping->host; unsigned long index, offset; struct page *cached_page; int reada_ok; int error; int max_readahead = get_max_readahead(inode); cached_page = NULL; index = *ppos >> PAGE_CACHE_SHIFT; offset = *ppos & ~PAGE_CACHE_MASK;/* * If the current position is outside the previous read-ahead window, * we reset the current read-ahead context and set read ahead max to zero * (will be set to just needed value later), * otherwise, we assume that the file accesses are sequential enough to * continue read-ahead. */ if (index > filp->f_raend || index + filp->f_rawin < filp->f_raend) { reada_ok = 0; filp->f_raend = 0; filp->f_ralen = 0; filp->f_ramax = 0; filp->f_rawin = 0; } else { reada_ok = 1; }/* * Adjust the current value of read-ahead max. * If the read operation stay in the first half page, force no readahead. * Otherwise try to increase read ahead max just enough to do the read request. * Then, at least MIN_READAHEAD if read ahead is ok, * and at most MAX_READAHEAD in all cases. */ if (!index && offset + desc->count <= (PAGE_CACHE_SIZE >> 1)) { filp->f_ramax = 0; } else { unsigned long needed; needed = ((offset + desc->count) >> PAGE_CACHE_SHIFT) + 1; if (filp->f_ramax < needed) filp->f_ramax = needed; if (reada_ok && filp->f_ramax < vm_min_readahead) filp->f_ramax = vm_min_readahead; if (filp->f_ramax > max_readahead) filp->f_ramax = max_readahead; } for (;;) { struct page *page, **hash; unsigned long end_index, nr, ret; end_index = inode->i_size >> PAGE_CACHE_SHIFT; if (index > end_index) break; nr = PAGE_CACHE_SIZE; if (index == end_index) { nr = inode->i_size & ~PAGE_CACHE_MASK; if (nr <= offset) break; } nr = nr - offset; /* * Try to find the data in the page cache.. */ hash = page_hash(mapping, index); spin_lock(&pagecache_lock); page = __find_page_nolock(mapping, index, *hash); if (!page) goto no_cached_page;found_page: page_cache_get(page); spin_unlock(&pagecache_lock); if (!Page_Uptodate(page)) goto page_not_up_to_date; generic_file_readahead(reada_ok, filp, inode, page);page_ok: /* If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing * before reading the page on the kernel side. */ if (mapping->i_mmap_shared != NULL) flush_dcache_page(page); /* * Mark the page accessed if we read the * beginning or we just did an lseek. */ if (!offset || !filp->f_reada) mark_page_accessed(page); /* * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... * * The actor routine returns how many bytes were actually used.. * NOTE! This may not be the same as how much of a user buffer * we filled up (we may be padding etc), so we can only update * "pos" here (the actor routine has to update the user buffer * pointers and the remaining count). */ ret = actor(desc, page, offset, nr); offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; page_cache_release(page); if (ret == nr && desc->count) continue; break;/* * Ok, the page was not immediately readable, so let's try to read ahead while we're at it.. */page_not_up_to_date: generic_file_readahead(reada_ok, filp, inode, page); if (Page_Uptodate(page)) goto page_ok; /* Get exclusive access to the page ... */ lock_page(page); /* Did it get unhashed before we got the lock? */ if (!page->mapping) { UnlockPage(page); page_cache_release(page); continue; } /* Did somebody else fill it already? */ if (Page_Uptodate(page)) { UnlockPage(page); goto page_ok; }readpage: /* ... and start the actual read. The read will unlock the page. */ error = mapping->a_ops->readpage(filp, page); if (!error) { if (Page_Uptodate(page)) goto page_ok; /* Again, try some read-ahead while waiting for the page to finish.. */ generic_file_readahead(reada_ok, filp, inode, page); wait_on_page(page); if (Page_Uptodate(page)) goto page_ok; error = -EIO; } /* UHHUH! A synchronous read error occurred. Report it */ desc->error = error; page_cache_release(page); break;no_cached_page: /* * Ok, it wasn't cached, so we need to create a new * page.. * * We get here with the page cache lock held. */ if (!cached_page) { spin_unlock(&pagecache_lock); cached_page = page_cache_alloc(mapping); if (!cached_page) { desc->error = -ENOMEM; break; } /* * Somebody may have added the page while we * dropped the page cache lock. Check for that. */ spin_lock(&pagecache_lock); page = __find_page_nolock(mapping, index, *hash); if (page) goto found_page; } /* * Ok, add the new page to the hash-queues... */ page = cached_page; __add_to_page_cache(page, mapping, index, hash); spin_unlock(&pagecache_lock); lru_cache_add(page); cached_page = NULL; goto readpage; } *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; filp->f_reada = 1; if (cached_page) page_cache_release(cached_page); UPDATE_ATIME(inode);}static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset){ ssize_t retval; int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress; struct kiobuf * iobuf; struct address_space * mapping = filp->f_dentry->d_inode->i_mapping; struct inode * inode = mapping->host; new_iobuf = 0; iobuf = filp->f_iobuf; if (test_and_set_bit(0, &filp->f_iobuf_lock)) { /* * A parallel read/write is using the preallocated iobuf * so just run slow and allocate a new one. */ retval = alloc_kiovec(1, &iobuf); if (retval) goto out; new_iobuf = 1; } blocksize = 1 << inode->i_blkbits; blocksize_bits = inode->i_blkbits; blocksize_mask = blocksize - 1; chunk_size = KIO_MAX_ATOMIC_IO << 10; retval = -EINVAL; if ((offset & blocksize_mask) || (count & blocksize_mask)) goto out_free; if (!mapping->a_ops->direct_IO) goto out_free; /* * Flush to disk exclusively the _data_, metadata must remain * completly asynchronous or performance will go to /dev/null. */ retval = filemap_fdatasync(mapping); if (retval == 0) retval = fsync_inode_data_buffers(inode); if (retval == 0) retval = filemap_fdatawait(mapping); if (retval < 0) goto out_free; progress = retval = 0; while (count > 0) { iosize = count; if (iosize > chunk_size) iosize = chunk_size; retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize); if (retval) break; retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize); if (rw == READ && retval > 0) mark_dirty_kiobuf(iobuf, retval); if (retval >= 0) { count -= retval; buf += retval; progress += retval; } unmap_kiobuf(iobuf); if (retval != iosize) break; } if (progress) retval = progress; out_free: if (!new_iobuf) clear_bit(0, &filp->f_iobuf_lock); else free_kiovec(1, &iobuf); out: return retval;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -