📄 lov_qos.c
字号:
#define LOV_QOS_EMPTY ((__u32)-1)/* compute optimal round-robin order, based on OSTs per OSS */static int qos_calc_rr(struct lov_obd *lov){ struct lov_qos_oss *oss; unsigned ost_count, placed, real_count; int i; ENTRY; if (!lov->lov_qos.lq_dirty_rr) { LASSERT(lov->lov_qos.lq_rr_size); RETURN(0); } down_write(&lov->lov_qos.lq_rw_sem); ost_count = lov->desc.ld_tgt_count; if (lov->lov_qos.lq_rr_size) OBD_FREE(lov->lov_qos.lq_rr_array, lov->lov_qos.lq_rr_size); lov->lov_qos.lq_rr_size = ost_count * sizeof(lov->lov_qos.lq_rr_array[0]); OBD_ALLOC(lov->lov_qos.lq_rr_array, lov->lov_qos.lq_rr_size); if (!lov->lov_qos.lq_rr_array) { lov->lov_qos.lq_rr_size = 0; up_write(&lov->lov_qos.lq_rw_sem); RETURN(-ENOMEM); } real_count = 0; for (i = 0; i < ost_count; i++) { lov->lov_qos.lq_rr_array[i] = LOV_QOS_EMPTY; if (lov->lov_tgts[i]) real_count++; } /* Place all the OSTs from 1 OSS at the same time. */ placed = 0; list_for_each_entry(oss, &lov->lov_qos.lq_oss_list, lqo_oss_list) { int j = 0; for (i = 0; i < ost_count; i++) { if (lov->lov_tgts[i] && (lov->lov_tgts[i]->ltd_qos.ltq_oss == oss)) { /* Evenly space these OSTs across arrayspace */ int next = j * ost_count / oss->lqo_ost_count; while (lov->lov_qos.lq_rr_array[next] != LOV_QOS_EMPTY) next = (next + 1) % ost_count; lov->lov_qos.lq_rr_array[next] = i; j++; placed++; } } LASSERT(j == oss->lqo_ost_count); } lov->lov_qos.lq_dirty_rr = 0; up_write(&lov->lov_qos.lq_rw_sem); if (placed != real_count) { /* This should never happen */ LCONSOLE_ERROR_MSG(0x14e, "Failed to place all OSTs in the " "round-robin list (%d of %d).\n", placed, real_count); for (i = 0; i < ost_count; i++) { LCONSOLE(D_WARNING, "rr #%d ost idx=%d\n", i, lov->lov_qos.lq_rr_array[i]); } lov->lov_qos.lq_dirty_rr = 1; RETURN(-EAGAIN); }#ifdef QOS_DEBUG for (i = 0; i < ost_count; i++) { LCONSOLE(D_QOS, "rr #%d ost idx=%d\n", i, lov->lov_qos.lq_rr_array[i]); }#endif RETURN(0);}void qos_shrink_lsm(struct lov_request_set *set){ struct lov_stripe_md *lsm = set->set_oi->oi_md, *lsm_new; /* XXX LOV STACKING call into osc for sizes */ unsigned oldsize, newsize; if (set->set_oti && set->set_cookies && set->set_cookie_sent) { struct llog_cookie *cookies; oldsize = lsm->lsm_stripe_count * sizeof(*cookies); newsize = set->set_count * sizeof(*cookies); cookies = set->set_cookies; oti_alloc_cookies(set->set_oti, set->set_count); if (set->set_oti->oti_logcookies) { memcpy(set->set_oti->oti_logcookies, cookies, newsize); OBD_FREE(cookies, oldsize); set->set_cookies = set->set_oti->oti_logcookies; } else { CWARN("'leaking' %d bytes\n", oldsize - newsize); } } CWARN("using fewer stripes for object "LPU64": old %u new %u\n", lsm->lsm_object_id, lsm->lsm_stripe_count, set->set_count); LASSERT(lsm->lsm_stripe_count >= set->set_count); newsize = lov_stripe_md_size(set->set_count); OBD_ALLOC(lsm_new, newsize); if (lsm_new != NULL) { int i; memcpy(lsm_new, lsm, sizeof(*lsm)); for (i = 0; i < lsm->lsm_stripe_count; i++) { if (i < set->set_count) { lsm_new->lsm_oinfo[i] = lsm->lsm_oinfo[i]; continue; } OBD_SLAB_FREE(lsm->lsm_oinfo[i], lov_oinfo_slab, sizeof(struct lov_oinfo)); } lsm_new->lsm_stripe_count = set->set_count; OBD_FREE(lsm, sizeof(struct lov_stripe_md) + lsm->lsm_stripe_count * sizeof(struct lov_oinfo *)); set->set_oi->oi_md = lsm_new; } else { CWARN("'leaking' few bytes\n"); }}int qos_remedy_create(struct lov_request_set *set, struct lov_request *req){ struct lov_stripe_md *lsm = set->set_oi->oi_md; struct lov_obd *lov = &set->set_exp->exp_obd->u.lov; unsigned ost_idx, ost_count = lov->desc.ld_tgt_count; int stripe, i, rc = -EIO; ENTRY; ost_idx = (req->rq_idx + lsm->lsm_stripe_count) % ost_count; for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { if (!lov->lov_tgts[ost_idx] || !lov->lov_tgts[ost_idx]->ltd_active) continue; /* check if objects has been created on this ost */ for (stripe = 0; stripe < lsm->lsm_stripe_count; stripe++) { if (stripe == req->rq_stripe) continue; if (ost_idx == lsm->lsm_oinfo[stripe]->loi_ost_idx) break; } if (stripe >= lsm->lsm_stripe_count) { req->rq_idx = ost_idx; rc = obd_create(lov->lov_tgts[ost_idx]->ltd_exp, req->rq_oi.oi_oa, &req->rq_oi.oi_md, set->set_oti); if (!rc) break; } } RETURN(rc);}static int min_stripe_count(int stripe_cnt, int flags){ return (flags & LOV_USES_DEFAULT_STRIPE ? stripe_cnt - (stripe_cnt / 4) : stripe_cnt);}#define LOV_CREATE_RESEED_MULT 4#define LOV_CREATE_RESEED_MIN 1000/* Allocate objects on osts with round-robin algorithm */static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt, int flags){ unsigned array_idx, ost_count = lov->desc.ld_tgt_count; unsigned ost_active_count = lov->desc.ld_active_tgt_count; int i, *idx_pos; __u32 ost_idx; int ost_start_idx_temp; int speed = 0; int stripe_cnt_min = min_stripe_count(*stripe_cnt, flags); ENTRY; i = qos_calc_rr(lov); if (i) RETURN(i); if (--lov->lov_start_count <= 0) { lov->lov_start_idx = ll_rand() % ost_count; lov->lov_start_count = (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) + LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U); } else if (stripe_cnt_min >= ost_active_count || lov->lov_start_idx > ost_count) { /* If we have allocated from all of the OSTs, slowly * precess the next start if the OST/stripe count isn't * already doing this for us. */ lov->lov_start_idx %= ost_count; if (*stripe_cnt > 1 && (ost_active_count % (*stripe_cnt)) != 1) ++lov->lov_offset_idx; } down_read(&lov->lov_qos.lq_rw_sem); ost_start_idx_temp = lov->lov_start_idx;repeat_find : array_idx = (lov->lov_start_idx + lov->lov_offset_idx) % ost_count; idx_pos = idx_arr;#ifdef QOS_DEBUG CDEBUG(D_QOS, "want %d startidx %d startcnt %d offset %d active %d " "count %d arrayidx %d\n", stripe_cnt, lov->lov_start_idx, lov->lov_start_count, lov->lov_offset_idx, ost_active_count, ost_count, array_idx);#endif for (i = 0; i < ost_count; i++, array_idx=(array_idx + 1) % ost_count) { ++lov->lov_start_idx; ost_idx = lov->lov_qos.lq_rr_array[array_idx];#ifdef QOS_DEBUG CDEBUG(D_QOS, "#%d strt %d act %d strp %d ary %d idx %d\n", i, lov->lov_start_idx, ((ost_idx != LOV_QOS_EMPTY) && lov->lov_tgts[ost_idx]) ? lov->lov_tgts[ost_idx]->ltd_active : 0, idx_pos - idx_arr, array_idx, ost_idx);#endif if ((ost_idx == LOV_QOS_EMPTY) || !lov->lov_tgts[ost_idx] || !lov->lov_tgts[ost_idx]->ltd_active) continue; /* Fail Check before osc_precreate() is called so we can only 'fail' single OSC. */ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && ost_idx == 0) continue; /* Drop slow OSCs if we can */ if (obd_precreate(lov->lov_tgts[ost_idx]->ltd_exp) > speed) continue; *idx_pos = ost_idx; idx_pos++; /* We have enough stripes */ if (idx_pos - idx_arr == *stripe_cnt) break; } if ((speed < 2) && (idx_pos - idx_arr < stripe_cnt_min)) { /* Try again, allowing slower OSCs */ speed++; lov->lov_start_idx = ost_start_idx_temp; goto repeat_find; } up_read(&lov->lov_qos.lq_rw_sem); *stripe_cnt = idx_pos - idx_arr; RETURN(0);}/* alloc objects on osts with specific stripe offset */static int alloc_specific(struct lov_obd *lov, struct lov_stripe_md *lsm, int *idx_arr){ unsigned ost_idx, ost_count = lov->desc.ld_tgt_count; int i, *idx_pos; int speed = 0; ENTRY;repeat_find: ost_idx = lsm->lsm_oinfo[0]->loi_ost_idx; idx_pos = idx_arr; for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { if (!lov->lov_tgts[ost_idx] || !lov->lov_tgts[ost_idx]->ltd_active) { continue; } /* Fail Check before osc_precreate() is called so we can only 'fail' single OSC. */ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && ost_idx == 0) continue; /* Drop slow OSCs if we can, but not for requested start idx */ if (obd_precreate(lov->lov_tgts[ost_idx]->ltd_exp) > speed && (i != 0 || speed < 2)) continue; *idx_pos = ost_idx; idx_pos++; /* We have enough stripes */ if (idx_pos - idx_arr == lsm->lsm_stripe_count) RETURN(0); } if (speed < 2) { /* Try again, allowing slower OSCs */ speed++; goto repeat_find; } /* If we were passed specific striping params, then a failure to * meet those requirements is an error, since we can't reallocate * that memory (it might be part of a larger array or something). * * We can only get here if lsm_stripe_count was originally > 1. */ CERROR("can't lstripe objid "LPX64": have %d want %u\n", lsm->lsm_object_id, (int)(idx_pos - idx_arr), lsm->lsm_stripe_count); RETURN(-EFBIG);}/* Alloc objects on osts with optimization based on: - free space - network resources (shared OSS's)*/static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt, int flags){ struct lov_obd *lov = &exp->exp_obd->u.lov; static time_t last_warn = 0; time_t now = cfs_time_current_sec();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -