📄 rw.c
字号:
CDEBUG(D_READA, "ra_end %lu end %lu stride end %lu \n", ra_end, end, ria.ria_end); if (ra_end != (end + 1)) { spin_lock(&ras->ras_lock); if (ra_end < ras->ras_next_readahead && index_in_window(ra_end, ras->ras_window_start, 0, ras->ras_window_len)) { ras->ras_next_readahead = ra_end; RAS_CDEBUG(ras); } spin_unlock(&ras->ras_lock); } RETURN(ret);}static void ras_set_start(struct ll_readahead_state *ras, unsigned long index){ ras->ras_window_start = index & (~(RAS_INCREASE_STEP - 1));}/* called with the ras_lock held or from places where it doesn't matter */static void ras_reset(struct ll_readahead_state *ras, unsigned long index){ ras->ras_last_readpage = index; ras->ras_consecutive_requests = 0; ras->ras_consecutive_pages = 0; ras->ras_window_len = 0; ras_set_start(ras, index); ras->ras_next_readahead = max(ras->ras_window_start, index); RAS_CDEBUG(ras);}/* called with the ras_lock held or from places where it doesn't matter */static void ras_stride_reset(struct ll_readahead_state *ras){ ras->ras_consecutive_stride_requests = 0; RAS_CDEBUG(ras);}void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras){ spin_lock_init(&ras->ras_lock); ras_reset(ras, 0); ras->ras_requests = 0; INIT_LIST_HEAD(&ras->ras_read_beads);}/* Check whether the read request is in the stride window. * If it is in the stride window, return 1, otherwise return 0. * and also update stride_gap and stride_pages. */static int index_in_stride_window(unsigned long index, struct ll_readahead_state *ras, struct inode *inode){ int stride_gap = index - ras->ras_last_readpage - 1; LASSERT(stride_gap != 0); if (ras->ras_consecutive_pages == 0) return 0; /*Otherwise check the stride by itself */ if ((ras->ras_stride_length - ras->ras_stride_pages) == stride_gap && ras->ras_consecutive_pages == ras->ras_stride_pages) return 1; if (stride_gap >= 0) { /* * only set stride_pages, stride_length if * it is forward reading ( stride_gap > 0) */ ras->ras_stride_pages = ras->ras_consecutive_pages; ras->ras_stride_length = stride_gap + ras->ras_consecutive_pages; } else { /* * If stride_gap < 0,(back_forward reading), * reset the stride_pages/length. * FIXME:back_ward stride I/O read. * */ ras->ras_stride_pages = 0; ras->ras_stride_length = 0; } RAS_CDEBUG(ras); return 0;}static unsigned longstride_page_count(struct ll_readahead_state *ras, unsigned long len){ return stride_pg_count(ras->ras_stride_offset, ras->ras_stride_length, ras->ras_stride_pages, ras->ras_stride_offset, len);}/* Stride Read-ahead window will be increased inc_len according to * stride I/O pattern */static void ras_stride_increase_window(struct ll_readahead_state *ras, struct ll_ra_info *ra, unsigned long inc_len){ unsigned long left, step, window_len; unsigned long stride_len; LASSERT(ras->ras_stride_length > 0); stride_len = ras->ras_window_start + ras->ras_window_len - ras->ras_stride_offset; LASSERTF(stride_len >= 0, "window_start %lu, window_len %lu" " stride_offset %lu\n", ras->ras_window_start, ras->ras_window_len, ras->ras_stride_offset); left = stride_len % ras->ras_stride_length; window_len = ras->ras_window_len - left; if (left < ras->ras_stride_pages) left += inc_len; else left = ras->ras_stride_pages + inc_len; LASSERT(ras->ras_stride_pages != 0); step = left / ras->ras_stride_pages; left %= ras->ras_stride_pages; window_len += step * ras->ras_stride_length + left; if (stride_page_count(ras, window_len) <= ra->ra_max_pages) ras->ras_window_len = window_len; RAS_CDEBUG(ras);}/* Set stride I/O read-ahead window start offset */static void ras_set_stride_offset(struct ll_readahead_state *ras){ unsigned long window_len = ras->ras_next_readahead - ras->ras_window_start; unsigned long left; LASSERT(ras->ras_stride_length != 0); left = window_len % ras->ras_stride_length; ras->ras_stride_offset = ras->ras_next_readahead - left; RAS_CDEBUG(ras);}static void ras_update(struct ll_sb_info *sbi, struct inode *inode, struct ll_readahead_state *ras, unsigned long index, unsigned hit){ struct ll_ra_info *ra = &sbi->ll_ra_info; int zero = 0, stride_zero = 0, stride_detect = 0, ra_miss = 0; ENTRY; spin_lock(&sbi->ll_lock); spin_lock(&ras->ras_lock); ll_ra_stats_inc_unlocked(ra, hit ? RA_STAT_HIT : RA_STAT_MISS); /* reset the read-ahead window in two cases. First when the app seeks * or reads to some other part of the file. Secondly if we get a * read-ahead miss that we think we've previously issued. This can * be a symptom of there being so many read-ahead pages that the VM is * reclaiming it before we get to it. */ if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) { zero = 1; ll_ra_stats_inc_unlocked(ra, RA_STAT_DISTANT_READPAGE); /* check whether it is in stride I/O mode*/ if (!index_in_stride_window(index, ras, inode)) stride_zero = 1; } else if (!hit && ras->ras_window_len && index < ras->ras_next_readahead && index_in_window(index, ras->ras_window_start, 0, ras->ras_window_len)) { zero = 1; ra_miss = 1; /* If it hits read-ahead miss and the stride I/O is still * not detected, reset stride stuff to re-detect the whole * stride I/O mode to avoid complication */ if (!stride_io_mode(ras)) stride_zero = 1; ll_ra_stats_inc_unlocked(ra, RA_STAT_MISS_IN_WINDOW); } /* On the second access to a file smaller than the tunable * ra_max_read_ahead_whole_pages trigger RA on all pages in the * file up to ra_max_pages. This is simply a best effort and * only occurs once per open file. Normal RA behavior is reverted * to for subsequent IO. The mmap case does not increment * ras_requests and thus can never trigger this behavior. */ if (ras->ras_requests == 2 && !ras->ras_request_index) { __u64 kms_pages; kms_pages = (i_size_read(inode) + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; CDEBUG(D_READA, "kmsp "LPU64" mwp %lu mp %lu\n", kms_pages, ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages); if (kms_pages && kms_pages <= ra->ra_max_read_ahead_whole_pages) { ras->ras_window_start = 0; ras->ras_last_readpage = 0; ras->ras_next_readahead = 0; ras->ras_window_len = min(ra->ra_max_pages, ra->ra_max_read_ahead_whole_pages); GOTO(out_unlock, 0); } } if (zero) { /* If it is discontinuous read, check * whether it is stride I/O mode*/ if (stride_zero) { ras_reset(ras, index); ras->ras_consecutive_pages++; ras_stride_reset(ras); RAS_CDEBUG(ras); GOTO(out_unlock, 0); } else { /* The read is still in stride window or * it hits read-ahead miss */ /* If ra-window miss is hitted, which probably means VM * pressure, and some read-ahead pages were reclaimed.So * the length of ra-window will not increased, but also * not reset to avoid redetecting the stride I/O mode.*/ ras->ras_consecutive_requests = 0; if (!ra_miss) { ras->ras_consecutive_pages = 0; if (++ras->ras_consecutive_stride_requests > 1) stride_detect = 1; } RAS_CDEBUG(ras); } } else if (ras->ras_consecutive_stride_requests > 1) { /* If this is contiguous read but in stride I/O mode * currently, check whether stride step still is valid, * if invalid, it will reset the stride ra window*/ if (ras->ras_consecutive_pages + 1 > ras->ras_stride_pages) ras_stride_reset(ras); } ras->ras_last_readpage = index; ras->ras_consecutive_pages++; ras_set_start(ras, index); ras->ras_next_readahead = max(ras->ras_window_start, ras->ras_next_readahead); RAS_CDEBUG(ras); /* Trigger RA in the mmap case where ras_consecutive_requests * is not incremented and thus can't be used to trigger RA */ if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) { ras->ras_window_len = RAS_INCREASE_STEP; GOTO(out_unlock, 0); } /* Initially reset the stride window offset to next_readahead*/ if (ras->ras_consecutive_stride_requests == 2 && stride_detect) ras_set_stride_offset(ras); /* The initial ras_window_len is set to the request size. To avoid * uselessly reading and discarding pages for random IO the window is * only increased once per consecutive request received. */ if ((ras->ras_consecutive_requests > 1 && !ras->ras_request_index) || stride_detect) { if (stride_io_mode(ras)) ras_stride_increase_window(ras, ra, RAS_INCREASE_STEP); else ras->ras_window_len = min(ras->ras_window_len + RAS_INCREASE_STEP, ra->ra_max_pages); } EXIT;out_unlock: RAS_CDEBUG(ras); ras->ras_request_index++; spin_unlock(&ras->ras_lock); spin_unlock(&sbi->ll_lock); return;}int ll_writepage(struct page *page){ struct inode *inode = page->mapping->host; struct ll_inode_info *lli = ll_i2info(inode); struct obd_export *exp; struct ll_async_page *llap; int rc = 0; ENTRY; LASSERT(PageLocked(page)); exp = ll_i2obdexp(inode); if (exp == NULL) GOTO(out, rc = -EINVAL); llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE); if (IS_ERR(llap)) GOTO(out, rc = PTR_ERR(llap)); LASSERT(!llap->llap_nocache); LASSERT(!PageWriteback(page)); set_page_writeback(page); page_cache_get(page); if (llap->llap_write_queued) { LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n"); rc = obd_set_async_flags(exp, lli->lli_smd, NULL, llap->llap_cookie, ASYNC_READY | ASYNC_URGENT); } else { rc = queue_or_sync_write(exp, inode, llap, CFS_PAGE_SIZE, ASYNC_READY | ASYNC_URGENT); } if (rc) page_cache_release(page);out: if (rc) { if (!lli->lli_async_rc) lli->lli_async_rc = rc; /* re-dirty page on error so it retries write */ if (PageWriteback(page)) { end_page_writeback(page); } /* resend page only for not started IO*/ if (!PageError(page)) ll_redirty_page(page); unlock_page(page); } RETURN(rc);}/* * for now we do our readpage the same on both 2.4 and 2.5. The kernel's * read-ahead assumes it is valid to issue readpage all the way up to * i_size, but our dlm locks make that not the case. We disable the * kernel's read-ahead and do our own by walking ahead in the page cache * checking for dlm lock coverage. the ma
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -