filter_io.c
来自「lustre 1.6.5 source code」· C语言 代码 · 共 863 行 · 第 1/3 页
C
863 行
spin_unlock(&obd->obd_osfs_lock); } iobuf = filter_iobuf_get(&obd->u.filter, oti); if (IS_ERR(iobuf)) RETURN(PTR_ERR(iobuf)); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); dentry = filter_oa2dentry(obd, oa); if (IS_ERR(dentry)) { rc = PTR_ERR(dentry); dentry = NULL; GOTO(cleanup, rc); } inode = dentry->d_inode; obdo_to_inode(inode, oa, OBD_MD_FLATIME); fsfilt_check_slow(obd, now, "preprw_read setup"); for (i = 0, lnb = res, rnb = nb; i < obj->ioo_bufcnt; i++, rnb++, lnb++) { lnb->dentry = dentry; lnb->offset = rnb->offset; lnb->len = rnb->len; lnb->flags = rnb->flags; /* * ost_brw_write()->ost_nio_pages_get() already initialized * lnb->page to point to the page from the per-thread page * pool (bug 5137), initialize page. */ LASSERT(lnb->page != NULL); if (i_size_read(inode) <= rnb->offset) /* If there's no more data, abort early. lnb->rc == 0, * so it's easy to detect later. */ break; else filter_alloc_dio_page(obd, inode, lnb); if (i_size_read(inode) < lnb->offset + lnb->len - 1) lnb->rc = i_size_read(inode) - lnb->offset; else lnb->rc = lnb->len; tot_bytes += lnb->rc; filter_iobuf_add_page(obd, iobuf, inode, lnb->page); } fsfilt_check_slow(obd, now, "start_page_read"); rc = filter_direct_io(OBD_BRW_READ, dentry, iobuf, exp, NULL, NULL, NULL); if (rc) GOTO(cleanup, rc); lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES, tot_bytes); if (exp->exp_nid_stats && exp->exp_nid_stats->nid_stats) lprocfs_counter_add(exp->exp_nid_stats->nid_stats, LPROC_FILTER_READ_BYTES, tot_bytes); EXIT; cleanup: if (rc != 0) { filter_free_dio_pages(objcount, obj, niocount, res); if (dentry != NULL) f_dput(dentry); } filter_iobuf_put(&obd->u.filter, iobuf, oti); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (rc) CERROR("io error %d\n", rc); return rc;}/* When clients have dirtied as much space as they've been granted they * fall through to sync writes. These sync writes haven't been expressed * in grants and need to error with ENOSPC when there isn't room in the * filesystem for them after grants are taken into account. However, * writeback of the dirty data that was already granted space can write * right on through. * * Caller must hold obd_osfs_lock. */static int filter_grant_check(struct obd_export *exp, struct obdo *oa, int objcount, struct fsfilt_objinfo *fso, int niocount, struct niobuf_remote *rnb, struct niobuf_local *lnb, obd_size *left, struct inode *inode){ struct filter_export_data *fed = &exp->exp_filter_data; int blocksize = exp->exp_obd->u.obt.obt_sb->s_blocksize; unsigned long used = 0, ungranted = 0, using; int i, rc = -ENOSPC, obj, n = 0; LASSERT_SPIN_LOCKED(&exp->exp_obd->obd_osfs_lock); for (obj = 0; obj < objcount; obj++) { for (i = 0; i < fso[obj].fso_bufcnt; i++, n++) { int tmp, bytes; /* should match the code in osc_exit_cache */ bytes = rnb[n].len; bytes += rnb[n].offset & (blocksize - 1); tmp = (rnb[n].offset + rnb[n].len) & (blocksize - 1); if (tmp) bytes += blocksize - tmp; if ((rnb[n].flags & OBD_BRW_FROM_GRANT) && (oa->o_valid & OBD_MD_FLGRANT)) { if (fed->fed_grant < used + bytes) { CDEBUG(D_CACHE, "%s: cli %s/%p claims %ld+%d " "GRANT, real grant %lu idx %d\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, used, bytes, fed->fed_grant, n); } else { used += bytes; rnb[n].flags |= OBD_BRW_GRANTED; lnb[n].lnb_grant_used = bytes; CDEBUG(0, "idx %d used=%lu\n", n, used); rc = 0; continue; } } if (*left > ungranted + bytes) { /* if enough space, pretend it was granted */ ungranted += bytes; rnb[n].flags |= OBD_BRW_GRANTED; lnb[n].lnb_grant_used = bytes; CDEBUG(0, "idx %d ungranted=%lu\n",n,ungranted); rc = 0; continue; } /* We can't check for already-mapped blocks here, as * it requires dropping the osfs lock to do the bmap. * Instead, we return ENOSPC and in that case we need * to go through and verify if all of the blocks not * marked BRW_GRANTED are already mapped and we can * ignore this error. */ lnb[n].rc = -ENOSPC; rnb[n].flags &= ~OBD_BRW_GRANTED; CDEBUG(D_CACHE,"%s: cli %s/%p idx %d no space for %d\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, n, bytes); } } /* Now substract what client have used already. We don't subtract * this from the tot_granted yet, so that other client's can't grab * that space before we have actually allocated our blocks. That * happens in filter_grant_commit() after the writes are done. */ *left -= ungranted; fed->fed_grant -= used; fed->fed_pending += used + ungranted; exp->exp_obd->u.filter.fo_tot_granted += ungranted; exp->exp_obd->u.filter.fo_tot_pending += used + ungranted; CDEBUG(D_CACHE, "%s: cli %s/%p used: %lu ungranted: %lu grant: %lu dirty: %lu\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, used, ungranted, fed->fed_grant, fed->fed_dirty); /* Rough calc in case we don't refresh cached statfs data */ using = (used + ungranted + 1 ) >> exp->exp_obd->u.obt.obt_sb->s_blocksize_bits; if (exp->exp_obd->obd_osfs.os_bavail > using) exp->exp_obd->obd_osfs.os_bavail -= using; else exp->exp_obd->obd_osfs.os_bavail = 0; if (fed->fed_dirty < used) { CERROR("%s: cli %s/%p claims used %lu > fed_dirty %lu\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, used, fed->fed_dirty); used = fed->fed_dirty; } exp->exp_obd->u.filter.fo_tot_dirty -= used; fed->fed_dirty -= used; if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0) { CERROR("%s: cli %s/%p dirty %ld pend %ld grant %ld\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, fed->fed_dirty, fed->fed_pending, fed->fed_grant); spin_unlock(&exp->exp_obd->obd_osfs_lock); LBUG(); } return rc;}/* If we ever start to support multi-object BRW RPCs, we will need to get locks * on mulitple inodes. That isn't all, because there still exists the * possibility of a truncate starting a new transaction while holding the ext3 * rwsem = write while some writes (which have started their transactions here) * blocking on the ext3 rwsem = read => lock inversion. * * The handling gets very ugly when dealing with locked pages. It may be easier * to just get rid of the locked page code (which has problems of its own) and * either discover we do not need it anymore (i.e. it was a symptom of another * bug) or ensure we get the page locks in an appropriate order. */static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, struct niobuf_local *res, struct obd_trans_info *oti){ struct lvfs_run_ctxt saved; struct niobuf_remote *rnb; struct niobuf_local *lnb = res; struct fsfilt_objinfo fso; struct filter_mod_data *fmd; struct dentry *dentry = NULL; void *iobuf; obd_size left; unsigned long now = jiffies; int rc = 0, i, tot_bytes = 0, cleanup_phase = 0; ENTRY; LASSERT(objcount == 1); LASSERT(obj->ioo_bufcnt > 0); push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); iobuf = filter_iobuf_get(&exp->exp_obd->u.filter, oti); if (IS_ERR(iobuf)) GOTO(cleanup, rc = PTR_ERR(iobuf)); cleanup_phase = 1; dentry = filter_fid2dentry(exp->exp_obd, NULL, obj->ioo_gr, obj->ioo_id); if (IS_ERR(dentry)) GOTO(cleanup, rc = PTR_ERR(dentry)); cleanup_phase = 2; if (dentry->d_inode == NULL) { CERROR("%s: trying to BRW to non-existent file "LPU64"\n", exp->exp_obd->obd_name, obj->ioo_id); GOTO(cleanup, rc = -ENOENT); } fso.fso_dentry = dentry; fso.fso_bufcnt = obj->ioo_bufcnt; fsfilt_check_slow(exp->exp_obd, now, "preprw_write setup"); /* Don't update inode timestamps if this write is older than a * setattr which modifies the timestamps. b=10150 */ /* XXX when we start having persistent reservations this needs to * be changed to filter_fmd_get() to create the fmd if it doesn't * already exist so we can store the reservation handle there. */ fmd = filter_fmd_find(exp, obj->ioo_id, obj->ioo_gr); LASSERT(oa != NULL); spin_lock(&exp->exp_obd->obd_osfs_lock); filter_grant_incoming(exp, oa); if (fmd && fmd->fmd_mactime_xid > oti->oti_xid) oa->o_valid &= ~(OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME); else obdo_to_inode(dentry->d_inode, oa, OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME); cleanup_phase = 3; left = filter_grant_space_left(exp); rc = filter_grant_check(exp, oa, objcount, &fso, niocount, nb, res, &left, dentry->d_inode); /* do not zero out oa->o_valid as it is used in filter_commitrw_write() * for setting UID/GID and fid EA in first write time. */ if (oa->o_valid & OBD_MD_FLGRANT) oa->o_grant = filter_grant(exp,oa->o_grant,oa->o_undirty,left); spin_unlock(&exp->exp_obd->obd_osfs_lock); filter_fmd_put(exp, fmd); if (rc) GOTO(cleanup, rc); for (i = 0, rnb = nb, lnb = res; i < obj->ioo_bufcnt; i++, lnb++, rnb++) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?