📄 rw.c
字号:
ras->ras_requests, ras->ras_request_index, \ ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \ ras->ras_stride_pages, ras->ras_stride_length)static int index_in_window(unsigned long index, unsigned long point, unsigned long before, unsigned long after){ unsigned long start = point - before, end = point + after; if (start > point) start = 0; if (end < point) end = ~0; return start <= index && index <= end;}static struct ll_readahead_state *ll_ras_get(struct file *f){ struct ll_file_data *fd; fd = LUSTRE_FPRIVATE(f); return &fd->fd_ras;}void ll_ra_read_in(struct file *f, struct ll_ra_read *rar){ struct ll_readahead_state *ras; ras = ll_ras_get(f); spin_lock(&ras->ras_lock); ras->ras_requests++; ras->ras_request_index = 0; ras->ras_consecutive_requests++; rar->lrr_reader = current; list_add(&rar->lrr_linkage, &ras->ras_read_beads); spin_unlock(&ras->ras_lock);}void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar){ struct ll_readahead_state *ras; ras = ll_ras_get(f); spin_lock(&ras->ras_lock); list_del_init(&rar->lrr_linkage); spin_unlock(&ras->ras_lock);}static struct ll_ra_read *ll_ra_read_get_locked(struct ll_readahead_state *ras){ struct ll_ra_read *scan; list_for_each_entry(scan, &ras->ras_read_beads, lrr_linkage) { if (scan->lrr_reader == current) return scan; } return NULL;}struct ll_ra_read *ll_ra_read_get(struct file *f){ struct ll_readahead_state *ras; struct ll_ra_read *bead; ras = ll_ras_get(f); spin_lock(&ras->ras_lock); bead = ll_ra_read_get_locked(ras); spin_unlock(&ras->ras_lock); return bead;}static int ll_read_ahead_page(struct obd_export *exp, struct obd_io_group *oig, int index, struct address_space *mapping){ struct ll_async_page *llap; struct page *page; unsigned int gfp_mask = 0; int rc = 0; gfp_mask = GFP_HIGHUSER & ~__GFP_WAIT;#ifdef __GFP_NOWARN gfp_mask |= __GFP_NOWARN;#endif page = grab_cache_page_nowait_gfp(mapping, index, gfp_mask); if (page == NULL) { ll_ra_stats_inc(mapping, RA_STAT_FAILED_GRAB_PAGE); CDEBUG(D_READA, "g_c_p_n failed\n"); return 0; } /* Check if page was truncated or reclaimed */ if (page->mapping != mapping) { ll_ra_stats_inc(mapping, RA_STAT_WRONG_GRAB_PAGE); CDEBUG(D_READA, "g_c_p_n returned invalid page\n"); GOTO(unlock_page, rc = 0); } /* we do this first so that we can see the page in the /proc * accounting */ llap = llap_from_page(page, LLAP_ORIGIN_READAHEAD); if (IS_ERR(llap) || llap->llap_defer_uptodate) { if (PTR_ERR(llap) == -ENOLCK) { ll_ra_stats_inc(mapping, RA_STAT_FAILED_MATCH); CDEBUG(D_READA | D_PAGE, "Adding page to cache failed index " "%d\n", index); CDEBUG(D_READA, "nolock page\n"); GOTO(unlock_page, rc = -ENOLCK); } CDEBUG(D_READA, "read-ahead page\n"); GOTO(unlock_page, rc = 0); } /* skip completed pages */ if (Page_Uptodate(page)) GOTO(unlock_page, rc = 0); /* bail out when we hit the end of the lock. */ rc = ll_issue_page_read(exp, llap, oig, 1); if (rc == 0) { LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "started read-ahead\n"); rc = 1; } else {unlock_page: unlock_page(page); LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "skipping read-ahead\n"); } page_cache_release(page); return rc;}/* ra_io_arg will be filled in the beginning of ll_readahead with * ras_lock, then the following ll_read_ahead_pages will read RA * pages according to this arg, all the items in this structure are * counted by page index. */struct ra_io_arg { unsigned long ria_start; /* start offset of read-ahead*/ unsigned long ria_end; /* end offset of read-ahead*/ /* If stride read pattern is detected, ria_stoff means where * stride read is started. Note: for normal read-ahead, the * value here is meaningless, and also it will not be accessed*/ pgoff_t ria_stoff; /* ria_length and ria_pages are the length and pages length in the * stride I/O mode. And they will also be used to check whether * it is stride I/O read-ahead in the read-ahead pages*/ unsigned long ria_length; unsigned long ria_pages;};#define RIA_DEBUG(ria) \ CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n", \ ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\ ria->ria_pages)#define RAS_INCREASE_STEP (1024 * 1024 >> CFS_PAGE_SHIFT)static inline int stride_io_mode(struct ll_readahead_state *ras){ return ras->ras_consecutive_stride_requests > 1;}/* The function calculates how much pages will be read in * [off, off + length], which will be read by stride I/O mode, * stride_offset = st_off, stride_lengh = st_len, * stride_pages = st_pgs */static unsigned longstride_pg_count(pgoff_t st_off, unsigned long st_len, unsigned long st_pgs, unsigned long off, unsigned length){ unsigned long cont_len = st_off > off ? st_off - off : 0; unsigned long stride_len = length + off > st_off ? length + off + 1 - st_off : 0; unsigned long left, pg_count; if (st_len == 0 || length == 0) return length; left = do_div(stride_len, st_len); left = min(left, st_pgs); pg_count = left + stride_len * st_pgs + cont_len; LASSERT(pg_count >= left); CDEBUG(D_READA, "st_off %lu, st_len %lu st_pgs %lu off %lu length %u" "pgcount %lu\n", st_off, st_len, st_pgs, off, length, pg_count); return pg_count;}static int ria_page_count(struct ra_io_arg *ria){ __u64 length = ria->ria_end >= ria->ria_start ? ria->ria_end - ria->ria_start + 1 : 0; return stride_pg_count(ria->ria_stoff, ria->ria_length, ria->ria_pages, ria->ria_start, length);}/*Check whether the index is in the defined ra-window */static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria){ /* If ria_length == ria_pages, it means non-stride I/O mode, * idx should always inside read-ahead window in this case * For stride I/O mode, just check whether the idx is inside * the ria_pages. */ return ria->ria_length == 0 || ria->ria_length == ria->ria_pages || (idx - ria->ria_stoff) % ria->ria_length < ria->ria_pages;}static int ll_read_ahead_pages(struct obd_export *exp, struct obd_io_group *oig, struct ra_io_arg *ria, unsigned long *reserved_pages, struct address_space *mapping, unsigned long *ra_end){ int rc, count = 0, stride_ria; unsigned long page_idx; LASSERT(ria != NULL); RIA_DEBUG(ria); stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0; for (page_idx = ria->ria_start; page_idx <= ria->ria_end && *reserved_pages > 0; page_idx++) { if (ras_inside_ra_window(page_idx, ria)) { /* If the page is inside the read-ahead window*/ rc = ll_read_ahead_page(exp, oig, page_idx, mapping); if (rc == 1) { (*reserved_pages)--; count ++; } else if (rc == -ENOLCK) break; } else if (stride_ria) { /* If it is not in the read-ahead window, and it is * read-ahead mode, then check whether it should skip * the stride gap */ pgoff_t offset; /* FIXME: This assertion only is valid when it is for * forward read-ahead, it will be fixed when backward * read-ahead is implemented */ LASSERTF(page_idx > ria->ria_stoff, "since %lu in the" " gap of ra window,it should bigger than stride" " offset %lu \n", page_idx, ria->ria_stoff); offset = page_idx - ria->ria_stoff; offset = offset % (ria->ria_length); if (offset > ria->ria_pages) { page_idx += ria->ria_length - offset; CDEBUG(D_READA, "i %lu skip %lu \n", page_idx, ria->ria_length - offset); continue; } } } *ra_end = page_idx; return count;}static int ll_readahead(struct ll_readahead_state *ras, struct obd_export *exp, struct address_space *mapping, struct obd_io_group *oig, int flags){ unsigned long start = 0, end = 0, reserved; unsigned long ra_end, len; struct inode *inode; struct lov_stripe_md *lsm; struct ll_ra_read *bead; struct ost_lvb lvb; struct ra_io_arg ria = { 0 }; int ret = 0; __u64 kms; ENTRY; inode = mapping->host; lsm = ll_i2info(inode)->lli_smd; lov_stripe_lock(lsm); inode_init_lvb(inode, &lvb); obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1); kms = lvb.lvb_size; lov_stripe_unlock(lsm); if (kms == 0) { ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN); RETURN(0); } spin_lock(&ras->ras_lock); bead = ll_ra_read_get_locked(ras); /* Enlarge the RA window to encompass the full read */ if (bead != NULL && ras->ras_window_start + ras->ras_window_len < bead->lrr_start + bead->lrr_count) { ras->ras_window_len = bead->lrr_start + bead->lrr_count - ras->ras_window_start; } /* Reserve a part of the read-ahead window that we'll be issuing */ if (ras->ras_window_len) { start = ras->ras_next_readahead; end = ras->ras_window_start + ras->ras_window_len - 1; } if (end != 0) { /* Truncate RA window to end of file */ end = min(end, (unsigned long)((kms - 1) >> CFS_PAGE_SHIFT)); ras->ras_next_readahead = max(end, end + 1); RAS_CDEBUG(ras); } ria.ria_start = start; ria.ria_end = end; /* If stride I/O mode is detected, get stride window*/ if (stride_io_mode(ras)) { ria.ria_length = ras->ras_stride_length; ria.ria_pages = ras->ras_stride_pages; } spin_unlock(&ras->ras_lock); if (end == 0) { ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW); RETURN(0); } len = ria_page_count(&ria); if (len == 0) RETURN(0); reserved = ll_ra_count_get(ll_i2sbi(inode), len); if (reserved < end - start + 1) ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT); CDEBUG(D_READA, "reserved page %lu \n", reserved); ret = ll_read_ahead_pages(exp, oig, &ria, &reserved, mapping, &ra_end); LASSERTF(reserved >= 0, "reserved %lu\n", reserved); if (reserved != 0) ll_ra_count_put(ll_i2sbi(inode), reserved); if (ra_end == end + 1 && ra_end == (kms >> CFS_PAGE_SHIFT)) ll_ra_stats_inc(mapping, RA_STAT_EOF); /* if we didn't get to the end of the region we reserved from * the ras we need to go back and update the ras so that the * next read-ahead tries from where we left off. we only do so * if the region we failed to issue read-ahead on is still ahead * of the app and behind the next index to start read-ahead from */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -