📄 lov_qos.c
字号:
__u64 total_bavail, total_weight = 0; __u32 ost_count; int nfound, good_osts, i, warn = 0, rc = 0; int stripe_cnt_min = min_stripe_count(*stripe_cnt, flags); ENTRY; if (stripe_cnt_min < 1) GOTO(out, rc = -EINVAL); lov_getref(exp->exp_obd); down_write(&lov->lov_qos.lq_rw_sem); ost_count = lov->desc.ld_tgt_count; if (lov->desc.ld_active_tgt_count < 2) GOTO(out, rc = -EAGAIN); rc = qos_calc_ppo(exp->exp_obd); if (rc) GOTO(out, rc); total_bavail = 0; good_osts = 0; /* Warn users about zero available space/inode every 30 min */ if (cfs_time_sub(now, last_warn) > 60 * 30) warn = 1; /* Find all the OSTs that are valid stripe candidates */ for (i = 0; i < ost_count; i++) { __u64 bavail; if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) continue; bavail = TGT_BAVAIL(i); if (!bavail) { if (warn) { CDEBUG(D_QOS, "no free space on %s\n", obd_uuid2str(&lov->lov_tgts[i]->ltd_uuid)); last_warn = now; } continue; } if (!TGT_FFREE(i)) { if (warn) { CDEBUG(D_QOS, "no free inodes on %s\n", obd_uuid2str(&lov->lov_tgts[i]->ltd_uuid)); last_warn = now; } continue; } /* Fail Check before osc_precreate() is called so we can only 'fail' single OSC. */ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && i == 0) continue; if (obd_precreate(lov->lov_tgts[i]->ltd_exp) > 2) continue; lov->lov_tgts[i]->ltd_qos.ltq_usable = 1; qos_calc_weight(lov, i); total_bavail += bavail; total_weight += lov->lov_tgts[i]->ltd_qos.ltq_weight; good_osts++; } if (good_osts < stripe_cnt_min) GOTO(out, rc = -EAGAIN); if (!total_bavail) GOTO(out, rc = -ENOSPC); /* We have enough osts */ if (good_osts < *stripe_cnt) *stripe_cnt = good_osts; /* Find enough OSTs with weighted random allocation. */ nfound = 0; while (nfound < *stripe_cnt) { __u64 rand, cur_weight; cur_weight = 0; rc = -ENODEV; if (total_weight) {#if BITS_PER_LONG == 32 rand = ll_rand() % (unsigned)total_weight; /* If total_weight > 32-bit, first generate the high * 32 bits of the random number, then add in the low * 32 bits (truncated to the upper limit, if needed) */ if (total_weight > 0xffffffffULL) rand = (__u64)(ll_rand() % (unsigned)(total_weight >> 32)) << 32; else rand = 0; if (rand == (total_weight & 0xffffffff00000000ULL)) rand |= ll_rand() % (unsigned)total_weight; else rand |= ll_rand();#else rand = ((__u64)ll_rand() << 32 | ll_rand()) % total_weight;#endif } else { rand = 0; } /* On average, this will hit larger-weighted osts more often. 0-weight osts will always get used last (only when rand=0).*/ for (i = 0; i < ost_count; i++) { if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_qos.ltq_usable) continue; cur_weight += lov->lov_tgts[i]->ltd_qos.ltq_weight; if (cur_weight >= rand) {#ifdef QOS_DEBUG CDEBUG(D_QOS, "assigned stripe=%d to idx=%d\n", nfound, i);#endif idx_arr[nfound++] = i; qos_used(lov, i, &total_weight); rc = 0; break; } } /* should never satisfy below condition */ if (rc) { CERROR("Didn't find any OSTs?\n"); break; } } LASSERT(nfound == *stripe_cnt);out: up_write(&lov->lov_qos.lq_rw_sem); if (rc == -EAGAIN) rc = alloc_rr(lov, idx_arr, stripe_cnt, flags); lov_putref(exp->exp_obd); RETURN(rc);}/* return new alloced stripe count on success */static int alloc_idx_array(struct obd_export *exp, struct lov_stripe_md *lsm, int newea, int **idx_arr, int *arr_cnt, int flags){ struct lov_obd *lov = &exp->exp_obd->u.lov; int stripe_cnt = lsm->lsm_stripe_count; int i, rc = 0; int *tmp_arr = NULL; ENTRY; *arr_cnt = stripe_cnt; OBD_ALLOC(tmp_arr, *arr_cnt * sizeof(int)); if (tmp_arr == NULL) RETURN(-ENOMEM); for (i = 0; i < *arr_cnt; i++) tmp_arr[i] = -1; if (newea || lsm->lsm_oinfo[0]->loi_ost_idx >= lov->desc.ld_tgt_count) rc = alloc_qos(exp, tmp_arr, &stripe_cnt, flags); else rc = alloc_specific(lov, lsm, tmp_arr); if (rc) GOTO(out_arr, rc); *idx_arr = tmp_arr; RETURN(stripe_cnt);out_arr: OBD_FREE(tmp_arr, *arr_cnt * sizeof(int)); *arr_cnt = 0; RETURN(rc);}static void free_idx_array(int *idx_arr, int arr_cnt){ if (arr_cnt) OBD_FREE(idx_arr, arr_cnt * sizeof(int));}int qos_prep_create(struct obd_export *exp, struct lov_request_set *set){ struct lov_obd *lov = &exp->exp_obd->u.lov; struct lov_stripe_md *lsm; struct obdo *src_oa = set->set_oi->oi_oa; struct obd_trans_info *oti = set->set_oti; int i, stripes, rc = 0, newea = 0; int flag = LOV_USES_ASSIGNED_STRIPE; int *idx_arr = NULL, idx_cnt = 0; ENTRY; LASSERT(src_oa->o_valid & OBD_MD_FLID); if (set->set_oi->oi_md == NULL) { int stripes_def = lov_get_stripecnt(lov, 0); /* If the MDS file was truncated up to some size, stripe over * enough OSTs to allow the file to be created at that size. * This may mean we use more than the default # of stripes. */ if (src_oa->o_valid & OBD_MD_FLSIZE) { obd_size min_bavail = LUSTRE_STRIPE_MAXBYTES; /* Find a small number of stripes we can use (up to # of active osts). */ stripes = 1; lov_getref(exp->exp_obd); for (i = 0; i < lov->desc.ld_tgt_count; i++) { if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) continue; min_bavail = min(min_bavail, TGT_BAVAIL(i)); if (min_bavail * stripes > src_oa->o_size) break; stripes++; } lov_putref(exp->exp_obd); if (stripes < stripes_def) stripes = stripes_def; } else { flag = LOV_USES_DEFAULT_STRIPE; stripes = stripes_def; } rc = lov_alloc_memmd(&set->set_oi->oi_md, stripes, lov->desc.ld_pattern ? lov->desc.ld_pattern : LOV_PATTERN_RAID0, LOV_MAGIC); if (rc < 0) GOTO(out_err, rc); newea = 1; rc = 0; } lsm = set->set_oi->oi_md; lsm->lsm_object_id = src_oa->o_id; if (!lsm->lsm_stripe_size) lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size; if (!lsm->lsm_pattern) { LASSERT(lov->desc.ld_pattern); lsm->lsm_pattern = lov->desc.ld_pattern; } stripes = alloc_idx_array(exp, lsm, newea, &idx_arr, &idx_cnt, flag); if (stripes <= 0) GOTO(out_err, rc = stripes ? stripes : -EIO); LASSERTF(stripes <= lsm->lsm_stripe_count,"requested %d allocated %d\n", lsm->lsm_stripe_count, stripes); for (i = 0; i < stripes; i++) { struct lov_request *req; int ost_idx = idx_arr[i]; LASSERT(ost_idx >= 0); OBD_ALLOC(req, sizeof(*req)); if (req == NULL) GOTO(out_err, rc = -ENOMEM); lov_set_add_req(req, set); req->rq_buflen = sizeof(*req->rq_oi.oi_md); OBD_ALLOC(req->rq_oi.oi_md, req->rq_buflen); if (req->rq_oi.oi_md == NULL) GOTO(out_err, rc = -ENOMEM); OBDO_ALLOC(req->rq_oi.oi_oa); if (req->rq_oi.oi_oa == NULL) GOTO(out_err, rc = -ENOMEM); req->rq_idx = ost_idx; req->rq_stripe = i; /* create data objects with "parent" OA */ memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa)); /* XXX When we start creating objects on demand, we need to * make sure that we always create the object on the * stripe which holds the existing file size. */ if (src_oa->o_valid & OBD_MD_FLSIZE) { req->rq_oi.oi_oa->o_size = lov_size_to_stripe(lsm, src_oa->o_size, i); CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n", i, req->rq_oi.oi_oa->o_size, src_oa->o_size); } } LASSERT(set->set_count == stripes); if (stripes < lsm->lsm_stripe_count) qos_shrink_lsm(set); if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) { oti_alloc_cookies(oti, set->set_count); if (!oti->oti_logcookies) GOTO(out_err, rc = -ENOMEM); set->set_cookies = oti->oti_logcookies; }out_err: if (newea && rc) obd_free_memmd(exp, &set->set_oi->oi_md); free_idx_array(idx_arr, idx_cnt); EXIT; return rc;}void qos_update(struct lov_obd *lov){ ENTRY; lov->lov_qos.lq_dirty = 1;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -