📄 mds_lov.c
字号:
} case OBD_IOC_SET_READONLY: { void *handle; struct inode *inode = obd->u.obt.obt_sb->s_root->d_inode; BDEVNAME_DECLARE_STORAGE(tmp); LCONSOLE_WARN("*** setting obd %s device '%s' read-only ***\n", obd->obd_name, ll_bdevname(obd->u.obt.obt_sb, tmp)); handle = fsfilt_start(obd, inode, FSFILT_OP_MKNOD, NULL); if (!IS_ERR(handle)) rc = fsfilt_commit(obd, inode, handle, 1); CDEBUG(D_HA, "syncing mds %s\n", obd->obd_name); rc = fsfilt_sync(obd, obd->u.obt.obt_sb); lvfs_set_rdonly(obd, obd->u.obt.obt_sb); RETURN(0); } case OBD_IOC_CATLOGLIST: { int count = mds->mds_lov_desc.ld_tgt_count; rc = llog_catalog_list(obd, count, data); RETURN(rc); } case OBD_IOC_LLOG_CHECK: case OBD_IOC_LLOG_CANCEL: case OBD_IOC_LLOG_REMOVE: { struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); int rc2; obd_llog_finish(obd, mds->mds_lov_desc.ld_tgt_count); push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); rc = llog_ioctl(ctxt, cmd, data); pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL); llog_ctxt_put(ctxt); rc2 = obd_set_info_async(mds->mds_osc_exp, sizeof(KEY_MDS_CONN), KEY_MDS_CONN, 0, NULL, NULL); if (!rc) rc = rc2; RETURN(rc); } case OBD_IOC_LLOG_INFO: case OBD_IOC_LLOG_PRINT: { struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); rc = llog_ioctl(ctxt, cmd, data); pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); llog_ctxt_put(ctxt); RETURN(rc); } case OBD_IOC_ABORT_RECOVERY: CERROR("aborting recovery for device %s\n", obd->obd_name); target_abort_recovery(obd); RETURN(0); default: CDEBUG(D_INFO, "unknown command %x\n", cmd); RETURN(-EINVAL); } RETURN(0);}/* Collect the preconditions we need to allow client connects */static void mds_allow_cli(struct obd_device *obd, unsigned long flag){ if (flag & CONFIG_LOG) obd->u.mds.mds_fl_cfglog = 1; if (flag & CONFIG_SYNC) obd->u.mds.mds_fl_synced = 1; if (obd->u.mds.mds_fl_cfglog /* bz11778: && obd->u.mds.mds_fl_synced */) /* Open for clients */ obd->obd_no_conn = 0;}struct mds_lov_sync_info { struct obd_device *mlsi_obd; /* the lov device to sync */ struct obd_device *mlsi_watched; /* target osc */ __u32 mlsi_index; /* index of target */};/* We only sync one osc at a time, so that we don't have to hold any kind of lock on the whole mds_lov_desc, which may change (grow) as a result of mds_lov_add_ost. This also avoids any kind of mismatch between the lov_desc and the mds_lov_desc, which are not in lock-step during lov_add_obd */static int __mds_lov_synchronize(void *data){ struct mds_lov_sync_info *mlsi = data; struct obd_device *obd = mlsi->mlsi_obd; struct obd_device *watched = mlsi->mlsi_watched; struct mds_obd *mds = &obd->u.mds; struct obd_uuid *uuid; __u32 idx = mlsi->mlsi_index; struct llog_ctxt *ctxt; int rc = 0; ENTRY; OBD_FREE(mlsi, sizeof(*mlsi)); LASSERT(obd); LASSERT(watched); uuid = &watched->u.cli.cl_target_uuid; LASSERT(uuid); OBD_RACE(OBD_FAIL_MDS_LOV_SYNC_RACE); rc = mds_lov_update_mds(obd, watched, idx); if (rc != 0) { CERROR("%s failed at update_mds: %d\n", obd_uuid2str(uuid), rc); GOTO(out, rc); } rc = obd_set_info_async(mds->mds_osc_exp, sizeof(KEY_MDS_CONN), KEY_MDS_CONN, 0, uuid, NULL); if (rc != 0) GOTO(out, rc); ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); if (!ctxt) RETURN(-ENODEV); OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT, 60); rc = llog_connect(ctxt, obd->u.mds.mds_lov_desc.ld_tgt_count, NULL, NULL, uuid); llog_ctxt_put(ctxt); if (rc != 0) { CERROR("%s failed at llog_origin_connect: %d\n", obd_uuid2str(uuid), rc); GOTO(out, rc); } LCONSOLE_INFO("MDS %s: %s now active, resetting orphans\n", obd->obd_name, obd_uuid2str(uuid)); if (obd->obd_stopping) GOTO(out, rc = -ENODEV); rc = mds_lov_clear_orphans(mds, uuid); if (rc != 0) { CERROR("%s failed at mds_lov_clear_orphans: %d\n", obd_uuid2str(uuid), rc); GOTO(out, rc); } EXIT;out: if (rc) { /* Deactivate it for safety */ CERROR("%s sync failed %d, deactivating\n", obd_uuid2str(uuid), rc); if (!obd->obd_stopping && mds->mds_osc_obd && !mds->mds_osc_obd->obd_stopping && !watched->obd_stopping) obd_notify(mds->mds_osc_obd, watched, OBD_NOTIFY_INACTIVE, NULL); } else { /* We've successfully synced at least 1 OST and are ready to handle client requests */ mds_allow_cli(obd, CONFIG_SYNC); } class_decref(obd); return rc;}int mds_lov_synchronize(void *data){ struct mds_lov_sync_info *mlsi = data; char name[20]; snprintf(name, sizeof(name), "ll_sync_%02u", mlsi->mlsi_index); ptlrpc_daemonize(name); RETURN(__mds_lov_synchronize(data));}int mds_lov_start_synchronize(struct obd_device *obd, struct obd_device *watched, void *data, int nonblock){ struct mds_lov_sync_info *mlsi; struct mds_obd *mds = &obd->u.mds; int rc; struct obd_uuid *uuid; ENTRY; LASSERT(watched); uuid = &watched->u.cli.cl_target_uuid; OBD_ALLOC(mlsi, sizeof(*mlsi)); if (mlsi == NULL) RETURN(-ENOMEM); mlsi->mlsi_obd = obd; mlsi->mlsi_watched = watched; if (data) mlsi->mlsi_index = *(__u32 *)data; else mlsi->mlsi_index = mds_lov_get_idx(mds->mds_osc_exp, uuid); /* Although class_export_get(obd->obd_self_export) would lock the MDS in place, since it's only a self-export it doesn't lock the LOV in place. The LOV can be disconnected during MDS precleanup, leaving nothing for __mds_lov_synchronize. Simply taking an export ref on the LOV doesn't help, because it's still disconnected. Taking an obd reference insures that we don't disconnect the LOV. This of course means a cleanup won't finish for as long as the sync is blocking. */ class_incref(obd); if (nonblock) { /* Synchronize in the background */ rc = cfs_kernel_thread(mds_lov_synchronize, mlsi, CLONE_VM | CLONE_FILES); if (rc < 0) { CERROR("%s: error starting mds_lov_synchronize: %d\n", obd->obd_name, rc); class_decref(obd); } else { CDEBUG(D_HA, "%s: mds_lov_synchronize idx=%d " "thread=%d\n", obd->obd_name, mlsi->mlsi_index, rc); rc = 0; } } else { rc = __mds_lov_synchronize((void *)mlsi); } RETURN(rc);}int mds_notify(struct obd_device *obd, struct obd_device *watched, enum obd_notify_event ev, void *data){ int rc = 0; ENTRY; switch (ev) { /* We only handle these: */ case OBD_NOTIFY_ACTIVE: case OBD_NOTIFY_SYNC: case OBD_NOTIFY_SYNC_NONBLOCK: break; case OBD_NOTIFY_CONFIG: mds_allow_cli(obd, (unsigned long)data); default: RETURN(0); } CDEBUG(D_CONFIG, "notify %s ev=%d\n", watched->obd_name, ev); if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) != 0) { CERROR("unexpected notification of %s %s!\n", watched->obd_type->typ_name, watched->obd_name); RETURN(-EINVAL); } if (obd->obd_recovering) { CWARN("MDS %s: in recovery, not resetting orphans on %s\n", obd->obd_name, obd_uuid2str(&watched->u.cli.cl_target_uuid)); /* We still have to fix the lov descriptor for ost's added after the mdt in the config log. They didn't make it into mds_lov_connect. */ mutex_down(&obd->obd_dev_sem); rc = mds_lov_update_desc(obd, obd->u.mds.mds_osc_exp); mutex_up(&obd->obd_dev_sem); mds_allow_cli(obd, CONFIG_SYNC); RETURN(rc); } LASSERT(!llog_ctxt_null(obd, LLOG_MDS_OST_ORIG_CTXT)); rc = mds_lov_start_synchronize(obd, watched, data, !(ev == OBD_NOTIFY_SYNC)); lquota_recovery(mds_quota_interface_ref, obd); RETURN(rc);}int mds_get_default_md(struct obd_device *obd, struct lov_mds_md *lmm, int *size){ struct lov_desc *ldesc; ENTRY; ldesc = &obd->u.mds.mds_lov_desc; LASSERT(ldesc != NULL); if (!lmm) RETURN(0); lmm->lmm_magic = LOV_MAGIC_V1; lmm->lmm_object_gr = LOV_OBJECT_GROUP_DEFAULT; lmm->lmm_pattern = ldesc->ld_pattern; lmm->lmm_stripe_size = ldesc->ld_default_stripe_size; lmm->lmm_stripe_count = ldesc->ld_default_stripe_count; *size = sizeof(struct lov_mds_md); RETURN(sizeof(struct lov_mds_md));}/* Convert the on-disk LOV EA structre. * We always try to convert from an old LOV EA format to the common in-memory * (lsm) format (obd_unpackmd() understands the old on-disk (lmm) format) and * then convert back to the new on-disk format and save it back to disk * (obd_packmd() only ever saves to the new on-disk format) so we don't have * to convert it each time this inode is accessed. * * This function is a bit interesting in the error handling. We can safely * ship the old lmm to the client in case of failure, since it uses the same * obd_unpackmd() code and can do the conversion if the MDS fails for some * reason. We will not delete the old lmm data until we have written the * new format lmm data in fsfilt_set_md(). */int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode, struct lov_mds_md *lmm, int lmm_size){ struct lov_stripe_md *lsm = NULL; void *handle; int rc, err; ENTRY; if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC || le32_to_cpu(lmm->lmm_magic == LOV_MAGIC_JOIN)) RETURN(0); CDEBUG(D_INODE, "converting LOV EA on %lu/%u from %#08x to %#08x\n", inode->i_ino, inode->i_generation, le32_to_cpu(lmm->lmm_magic), LOV_MAGIC); rc = obd_unpackmd(obd->u.mds.mds_osc_exp, &lsm, lmm, lmm_size); if (rc < 0) GOTO(conv_end, rc); rc = obd_packmd(obd->u.mds.mds_osc_exp, &lmm, lsm); if (rc < 0) GOTO(conv_free, rc); lmm_size = rc; handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); if (IS_ERR(handle)) { rc = PTR_ERR(handle); GOTO(conv_free, rc); } rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size, "lov"); err = fsfilt_commit(obd, inode, handle, 0); if (!rc) rc = err ? err : lmm_size; GOTO(conv_free, rc);conv_free: obd_free_memmd(obd->u.mds.mds_osc_exp, &lsm);conv_end: return rc;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -