filter_io.c

来自「lustre 1.6.5 source code」· C语言 代码 · 共 863 行 · 第 1/3 页

C
863
字号
                spin_unlock(&obd->obd_osfs_lock);        }        iobuf = filter_iobuf_get(&obd->u.filter, oti);        if (IS_ERR(iobuf))                RETURN(PTR_ERR(iobuf));        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);        dentry = filter_oa2dentry(obd, oa);        if (IS_ERR(dentry)) {                rc = PTR_ERR(dentry);                dentry = NULL;                GOTO(cleanup, rc);        }        inode = dentry->d_inode;        obdo_to_inode(inode, oa, OBD_MD_FLATIME);        fsfilt_check_slow(obd, now, "preprw_read setup");        for (i = 0, lnb = res, rnb = nb; i < obj->ioo_bufcnt;             i++, rnb++, lnb++) {                lnb->dentry = dentry;                lnb->offset = rnb->offset;                lnb->len    = rnb->len;                lnb->flags  = rnb->flags;                /*                 * ost_brw_write()->ost_nio_pages_get() already initialized                 * lnb->page to point to the page from the per-thread page                 * pool (bug 5137), initialize page.                 */                LASSERT(lnb->page != NULL);                if (i_size_read(inode) <= rnb->offset)                        /* If there's no more data, abort early.  lnb->rc == 0,                         * so it's easy to detect later. */                        break;                else                        filter_alloc_dio_page(obd, inode, lnb);                if (i_size_read(inode) < lnb->offset + lnb->len - 1)                        lnb->rc = i_size_read(inode) - lnb->offset;                else                        lnb->rc = lnb->len;                tot_bytes += lnb->rc;                filter_iobuf_add_page(obd, iobuf, inode, lnb->page);        }        fsfilt_check_slow(obd, now, "start_page_read");        rc = filter_direct_io(OBD_BRW_READ, dentry, iobuf,                              exp, NULL, NULL, NULL);        if (rc)                GOTO(cleanup, rc);        lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES, tot_bytes);        if (exp->exp_nid_stats && exp->exp_nid_stats->nid_stats)                lprocfs_counter_add(exp->exp_nid_stats->nid_stats,                                    LPROC_FILTER_READ_BYTES, tot_bytes);        EXIT; cleanup:        if (rc != 0) {                filter_free_dio_pages(objcount, obj, niocount, res);                if (dentry != NULL)                        f_dput(dentry);        }        filter_iobuf_put(&obd->u.filter, iobuf, oti);        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);        if (rc)                CERROR("io error %d\n", rc);        return rc;}/* When clients have dirtied as much space as they've been granted they * fall through to sync writes.  These sync writes haven't been expressed * in grants and need to error with ENOSPC when there isn't room in the * filesystem for them after grants are taken into account.  However, * writeback of the dirty data that was already granted space can write * right on through. * * Caller must hold obd_osfs_lock. */static int filter_grant_check(struct obd_export *exp, struct obdo *oa,                               int objcount, struct fsfilt_objinfo *fso,                               int niocount, struct niobuf_remote *rnb,                              struct niobuf_local *lnb, obd_size *left,                              struct inode *inode){        struct filter_export_data *fed = &exp->exp_filter_data;        int blocksize = exp->exp_obd->u.obt.obt_sb->s_blocksize;        unsigned long used = 0, ungranted = 0, using;        int i, rc = -ENOSPC, obj, n = 0;        LASSERT_SPIN_LOCKED(&exp->exp_obd->obd_osfs_lock);        for (obj = 0; obj < objcount; obj++) {                for (i = 0; i < fso[obj].fso_bufcnt; i++, n++) {                        int tmp, bytes;                        /* should match the code in osc_exit_cache */                        bytes = rnb[n].len;                        bytes += rnb[n].offset & (blocksize - 1);                        tmp = (rnb[n].offset + rnb[n].len) & (blocksize - 1);                        if (tmp)                                bytes += blocksize - tmp;                        if ((rnb[n].flags & OBD_BRW_FROM_GRANT) &&                            (oa->o_valid & OBD_MD_FLGRANT)) {                                if (fed->fed_grant < used + bytes) {                                        CDEBUG(D_CACHE,                                               "%s: cli %s/%p claims %ld+%d "                                               "GRANT, real grant %lu idx %d\n",                                               exp->exp_obd->obd_name,                                               exp->exp_client_uuid.uuid, exp,                                               used, bytes, fed->fed_grant, n);                                } else {                                        used += bytes;                                        rnb[n].flags |= OBD_BRW_GRANTED;                                        lnb[n].lnb_grant_used = bytes;                                        CDEBUG(0, "idx %d used=%lu\n", n, used);                                        rc = 0;                                        continue;                                }                        }                        if (*left > ungranted + bytes) {                                /* if enough space, pretend it was granted */                                ungranted += bytes;                                rnb[n].flags |= OBD_BRW_GRANTED;                                lnb[n].lnb_grant_used = bytes;                                CDEBUG(0, "idx %d ungranted=%lu\n",n,ungranted);                                rc = 0;                                continue;                        }                        /* We can't check for already-mapped blocks here, as                         * it requires dropping the osfs lock to do the bmap.                         * Instead, we return ENOSPC and in that case we need                         * to go through and verify if all of the blocks not                         * marked BRW_GRANTED are already mapped and we can                         * ignore this error. */                        lnb[n].rc = -ENOSPC;                        rnb[n].flags &= ~OBD_BRW_GRANTED;                        CDEBUG(D_CACHE,"%s: cli %s/%p idx %d no space for %d\n",                               exp->exp_obd->obd_name,                               exp->exp_client_uuid.uuid, exp, n, bytes);                }        }        /* Now substract what client have used already.  We don't subtract         * this from the tot_granted yet, so that other client's can't grab         * that space before we have actually allocated our blocks.  That         * happens in filter_grant_commit() after the writes are done. */        *left -= ungranted;        fed->fed_grant -= used;        fed->fed_pending += used + ungranted;        exp->exp_obd->u.filter.fo_tot_granted += ungranted;        exp->exp_obd->u.filter.fo_tot_pending += used + ungranted;        CDEBUG(D_CACHE,               "%s: cli %s/%p used: %lu ungranted: %lu grant: %lu dirty: %lu\n",               exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, used,               ungranted, fed->fed_grant, fed->fed_dirty);        /* Rough calc in case we don't refresh cached statfs data */        using = (used + ungranted + 1 ) >>                exp->exp_obd->u.obt.obt_sb->s_blocksize_bits;        if (exp->exp_obd->obd_osfs.os_bavail > using)                exp->exp_obd->obd_osfs.os_bavail -= using;        else                exp->exp_obd->obd_osfs.os_bavail = 0;        if (fed->fed_dirty < used) {                CERROR("%s: cli %s/%p claims used %lu > fed_dirty %lu\n",                       exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp,                       used, fed->fed_dirty);                used = fed->fed_dirty;        }        exp->exp_obd->u.filter.fo_tot_dirty -= used;        fed->fed_dirty -= used;        if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0) {                CERROR("%s: cli %s/%p dirty %ld pend %ld grant %ld\n",                       exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp,                       fed->fed_dirty, fed->fed_pending, fed->fed_grant);                spin_unlock(&exp->exp_obd->obd_osfs_lock);                LBUG();        }        return rc;}/* If we ever start to support multi-object BRW RPCs, we will need to get locks * on mulitple inodes.  That isn't all, because there still exists the * possibility of a truncate starting a new transaction while holding the ext3 * rwsem = write while some writes (which have started their transactions here) * blocking on the ext3 rwsem = read => lock inversion. * * The handling gets very ugly when dealing with locked pages.  It may be easier * to just get rid of the locked page code (which has problems of its own) and * either discover we do not need it anymore (i.e. it was a symptom of another * bug) or ensure we get the page locks in an appropriate order. */static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,                               int objcount, struct obd_ioobj *obj,                               int niocount, struct niobuf_remote *nb,                               struct niobuf_local *res,                               struct obd_trans_info *oti){        struct lvfs_run_ctxt saved;        struct niobuf_remote *rnb;        struct niobuf_local *lnb = res;        struct fsfilt_objinfo fso;        struct filter_mod_data *fmd;        struct dentry *dentry = NULL;        void *iobuf;        obd_size left;        unsigned long now = jiffies;        int rc = 0, i, tot_bytes = 0, cleanup_phase = 0;        ENTRY;        LASSERT(objcount == 1);        LASSERT(obj->ioo_bufcnt > 0);        push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);        iobuf = filter_iobuf_get(&exp->exp_obd->u.filter, oti);        if (IS_ERR(iobuf))                GOTO(cleanup, rc = PTR_ERR(iobuf));        cleanup_phase = 1;        dentry = filter_fid2dentry(exp->exp_obd, NULL, obj->ioo_gr,                                   obj->ioo_id);        if (IS_ERR(dentry))                GOTO(cleanup, rc = PTR_ERR(dentry));        cleanup_phase = 2;        if (dentry->d_inode == NULL) {                CERROR("%s: trying to BRW to non-existent file "LPU64"\n",                       exp->exp_obd->obd_name, obj->ioo_id);                GOTO(cleanup, rc = -ENOENT);        }        fso.fso_dentry = dentry;        fso.fso_bufcnt = obj->ioo_bufcnt;        fsfilt_check_slow(exp->exp_obd, now, "preprw_write setup");        /* Don't update inode timestamps if this write is older than a         * setattr which modifies the timestamps. b=10150 */        /* XXX when we start having persistent reservations this needs to         * be changed to filter_fmd_get() to create the fmd if it doesn't         * already exist so we can store the reservation handle there. */        fmd = filter_fmd_find(exp, obj->ioo_id, obj->ioo_gr);        LASSERT(oa != NULL);        spin_lock(&exp->exp_obd->obd_osfs_lock);         filter_grant_incoming(exp, oa);        if (fmd && fmd->fmd_mactime_xid > oti->oti_xid)                oa->o_valid &= ~(OBD_MD_FLMTIME | OBD_MD_FLCTIME |                                 OBD_MD_FLATIME);        else                obdo_to_inode(dentry->d_inode, oa, OBD_MD_FLATIME |                              OBD_MD_FLMTIME | OBD_MD_FLCTIME);        cleanup_phase = 3;        left = filter_grant_space_left(exp);        rc = filter_grant_check(exp, oa, objcount, &fso, niocount, nb, res,                                &left, dentry->d_inode);        /* do not zero out oa->o_valid as it is used in filter_commitrw_write()         * for setting UID/GID and fid EA in first write time. */        if (oa->o_valid & OBD_MD_FLGRANT)                oa->o_grant = filter_grant(exp,oa->o_grant,oa->o_undirty,left);        spin_unlock(&exp->exp_obd->obd_osfs_lock);        filter_fmd_put(exp, fmd);        if (rc)                GOTO(cleanup, rc);        for (i = 0, rnb = nb, lnb = res; i < obj->ioo_bufcnt;             i++, lnb++, rnb++) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?