📄 rw.c
字号:
* truncate's punch on the storage targets. * * The kms serves these purposes as it is set at both truncate and extending * writes. */static int ll_ap_refresh_count(void *data, int cmd){ struct ll_inode_info *lli; struct ll_async_page *llap; struct lov_stripe_md *lsm; struct page *page; struct inode *inode; struct ost_lvb lvb; __u64 kms; ENTRY; /* readpage queues with _COUNT_STABLE, shouldn't get here. */ LASSERT(cmd != OBD_BRW_READ); llap = LLAP_FROM_COOKIE(data); page = llap->llap_page; inode = page->mapping->host; lli = ll_i2info(inode); lsm = lli->lli_smd; lov_stripe_lock(lsm); inode_init_lvb(inode, &lvb); obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1); kms = lvb.lvb_size; lov_stripe_unlock(lsm); /* catch race with truncate */ if (((__u64)page->index << CFS_PAGE_SHIFT) >= kms) return 0; /* catch sub-page write at end of file */ if (((__u64)page->index << CFS_PAGE_SHIFT) + CFS_PAGE_SIZE > kms) return kms % CFS_PAGE_SIZE; return CFS_PAGE_SIZE;}void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa){ struct lov_stripe_md *lsm; obd_flag valid_flags; lsm = ll_i2info(inode)->lli_smd; oa->o_id = lsm->lsm_object_id; oa->o_valid = OBD_MD_FLID; valid_flags = OBD_MD_FLTYPE | OBD_MD_FLATIME; if (cmd & OBD_BRW_WRITE) { oa->o_valid |= OBD_MD_FLEPOCH; oa->o_easize = ll_i2info(inode)->lli_io_epoch; valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLFID | OBD_MD_FLGENER; } obdo_from_inode(oa, inode, valid_flags);}static void ll_ap_fill_obdo(void *data, int cmd, struct obdo *oa){ struct ll_async_page *llap; ENTRY; llap = LLAP_FROM_COOKIE(data); ll_inode_fill_obdo(llap->llap_page->mapping->host, cmd, oa); EXIT;}static void ll_ap_update_obdo(void *data, int cmd, struct obdo *oa, obd_valid valid){ struct ll_async_page *llap; ENTRY; llap = LLAP_FROM_COOKIE(data); obdo_from_inode(oa, llap->llap_page->mapping->host, valid); EXIT;}static struct obd_async_page_ops ll_async_page_ops = { .ap_make_ready = ll_ap_make_ready, .ap_refresh_count = ll_ap_refresh_count, .ap_fill_obdo = ll_ap_fill_obdo, .ap_update_obdo = ll_ap_update_obdo, .ap_completion = ll_ap_completion,};struct ll_async_page *llap_cast_private(struct page *page){ struct ll_async_page *llap = (struct ll_async_page *)page_private(page); LASSERTF(llap == NULL || llap->llap_magic == LLAP_MAGIC, "page %p private %lu gave magic %d which != %d\n", page, page_private(page), llap->llap_magic, LLAP_MAGIC); return llap;}/* Try to shrink the page cache for the @sbi filesystem by 1/@shrink_fraction. * * There is an llap attached onto every page in lustre, linked off @sbi. * We add an llap to the list so we don't lose our place during list walking. * If llaps in the list are being moved they will only move to the end * of the LRU, and we aren't terribly interested in those pages here (we * start at the beginning of the list where the least-used llaps are. */int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction){ struct ll_async_page *llap, dummy_llap = { .llap_magic = 0xd11ad11a }; unsigned long total, want, count = 0; total = sbi->ll_async_page_count; /* There can be a large number of llaps (600k or more in a large * memory machine) so the VM 1/6 shrink ratio is likely too much. * Since we are freeing pages also, we don't necessarily want to * shrink so much. Limit to 40MB of pages + llaps per call. */ if (shrink_fraction == 0) want = sbi->ll_async_page_count - sbi->ll_async_page_max + 32; else want = (total + shrink_fraction - 1) / shrink_fraction; if (want > 40 << (20 - CFS_PAGE_SHIFT)) want = 40 << (20 - CFS_PAGE_SHIFT); CDEBUG(D_CACHE, "shrinking %lu of %lu pages (1/%d)\n", want, total, shrink_fraction); spin_lock(&sbi->ll_lock); list_add(&dummy_llap.llap_pglist_item, &sbi->ll_pglist); while (--total >= 0 && count < want) { struct page *page; int keep; if (unlikely(need_resched())) { spin_unlock(&sbi->ll_lock); cond_resched(); spin_lock(&sbi->ll_lock); } llap = llite_pglist_next_llap(sbi,&dummy_llap.llap_pglist_item); list_del_init(&dummy_llap.llap_pglist_item); if (llap == NULL) break; page = llap->llap_page; LASSERT(page != NULL); list_add(&dummy_llap.llap_pglist_item, &llap->llap_pglist_item); /* Page needs/undergoing IO */ if (TryLockPage(page)) { LL_CDEBUG_PAGE(D_PAGE, page, "can't lock\n"); continue; } keep = (llap->llap_write_queued || PageDirty(page) || PageWriteback(page) || (!PageUptodate(page) && llap->llap_origin != LLAP_ORIGIN_READAHEAD)); LL_CDEBUG_PAGE(D_PAGE, page,"%s LRU page: %s%s%s%s%s origin %s\n", keep ? "keep" : "drop", llap->llap_write_queued ? "wq " : "", PageDirty(page) ? "pd " : "", PageUptodate(page) ? "" : "!pu ", PageWriteback(page) ? "wb" : "", llap->llap_defer_uptodate ? "" : "!du", llap_origins[llap->llap_origin]); /* If page is dirty or undergoing IO don't discard it */ if (keep) { unlock_page(page); continue; } page_cache_get(page); spin_unlock(&sbi->ll_lock); if (page->mapping != NULL) { ll_teardown_mmaps(page->mapping, (__u64)page->index << CFS_PAGE_SHIFT, ((__u64)page->index << CFS_PAGE_SHIFT)| ~CFS_PAGE_MASK); if (!PageDirty(page) && !page_mapped(page)) { ll_ra_accounting(llap, page->mapping); ll_truncate_complete_page(page); ++count; } else { LL_CDEBUG_PAGE(D_PAGE, page, "Not dropping page" " because it is " "%s\n", PageDirty(page)? "dirty":"mapped"); } } unlock_page(page); page_cache_release(page); spin_lock(&sbi->ll_lock); } list_del(&dummy_llap.llap_pglist_item); spin_unlock(&sbi->ll_lock); CDEBUG(D_CACHE, "shrank %lu/%lu and left %lu unscanned\n", count, want, total); return count;}static struct ll_async_page *llap_from_page_with_lockh(struct page *page, unsigned origin, struct lustre_handle *lockh){ struct ll_async_page *llap; struct obd_export *exp; struct inode *inode = page->mapping->host; struct ll_sb_info *sbi; int rc; ENTRY; if (!inode) { static int triggered; if (!triggered) { LL_CDEBUG_PAGE(D_ERROR, page, "Bug 10047. Wrong anon " "page received\n"); libcfs_debug_dumpstack(NULL); triggered = 1; } RETURN(ERR_PTR(-EINVAL)); } sbi = ll_i2sbi(inode); LASSERT(ll_async_page_slab); LASSERTF(origin < LLAP__ORIGIN_MAX, "%u\n", origin); llap = llap_cast_private(page); if (llap != NULL) { /* move to end of LRU list, except when page is just about to * die */ if (origin != LLAP_ORIGIN_REMOVEPAGE) { spin_lock(&sbi->ll_lock); sbi->ll_pglist_gen++; list_del_init(&llap->llap_pglist_item); list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist); spin_unlock(&sbi->ll_lock); } GOTO(out, llap); } exp = ll_i2obdexp(page->mapping->host); if (exp == NULL) RETURN(ERR_PTR(-EINVAL)); /* limit the number of lustre-cached pages */ if (sbi->ll_async_page_count >= sbi->ll_async_page_max) llap_shrink_cache(sbi, 0); OBD_SLAB_ALLOC(llap, ll_async_page_slab, CFS_ALLOC_STD, ll_async_page_slab_size); if (llap == NULL) RETURN(ERR_PTR(-ENOMEM)); llap->llap_magic = LLAP_MAGIC; llap->llap_cookie = (void *)llap + size_round(sizeof(*llap)); /* XXX: for bug 11270 - check for lockless origin here! */ if (origin == LLAP_ORIGIN_LOCKLESS_IO) llap->llap_nocache = 1; rc = obd_prep_async_page(exp, ll_i2info(inode)->lli_smd, NULL, page, (obd_off)page->index << CFS_PAGE_SHIFT, &ll_async_page_ops, llap, &llap->llap_cookie, llap->llap_nocache, lockh); if (rc) { OBD_SLAB_FREE(llap, ll_async_page_slab, ll_async_page_slab_size); RETURN(ERR_PTR(rc)); } CDEBUG(D_CACHE, "llap %p page %p cookie %p obj off "LPU64"\n", llap, page, llap->llap_cookie, (obd_off)page->index << CFS_PAGE_SHIFT); /* also zeroing the PRIVBITS low order bitflags */ __set_page_ll_data(page, llap); llap->llap_page = page; spin_lock(&sbi->ll_lock); sbi->ll_pglist_gen++; sbi->ll_async_page_count++; list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist); spin_unlock(&sbi->ll_lock); out: if (unlikely(sbi->ll_flags & LL_SBI_LLITE_CHECKSUM)) { __u32 csum; char *kaddr = kmap_atomic(page, KM_USER0); csum = init_checksum(OSC_DEFAULT_CKSUM); csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE, OSC_DEFAULT_CKSUM); kunmap_atomic(kaddr, KM_USER0); if (origin == LLAP_ORIGIN_READAHEAD || origin == LLAP_ORIGIN_READPAGE || origin == LLAP_ORIGIN_LOCKLESS_IO) { llap->llap_checksum = 0; } else if (origin == LLAP_ORIGIN_COMMIT_WRITE || llap->llap_checksum == 0) { llap->llap_checksum = csum; CDEBUG(D_PAGE, "page %p cksum %x\n", page, csum); } else if (llap->llap_checksum == csum) { /* origin == LLAP_ORIGIN_WRITEPAGE */ CDEBUG(D_PAGE, "page %p cksum %x confirmed\n", page, csum); } else { /* origin == LLAP_ORIGIN_WRITEPAGE */ LL_CDEBUG_PAGE(D_ERROR, page, "old cksum %x != new " "%x!\n", llap->llap_checksum, csum); } } llap->llap_origin = origin; RETURN(llap);}static inline struct ll_async_page *llap_from_page(struct page *page, unsigned origin){ return llap_from_page_with_lockh(page, origin, NULL);}static int queue_or_sync_write(struct obd_export *exp, struct inode *inode, struct ll_async_page *llap, unsigned to, obd_flag async_flags){ unsigned long size_index = i_size_read(inode) >> CFS_PAGE_SHIFT; struct obd_io_group *oig; struct ll_sb_info *sbi = ll_i2sbi(inode); int rc, noquot = llap->llap_ignore_quota ? OBD_BRW_NOQUOTA : 0; ENTRY; /* _make_ready only sees llap once we've unlocked the page */ llap->llap_write_queued = 1; rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL, llap->llap_cookie, OBD_BRW_WRITE | noquot, 0, 0, 0, async_flags); if (rc == 0) { LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "write queued\n");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -