ldlm_pool.c
来自「lustre 1.6.5 source code」· C语言 代码 · 共 1,146 行 · 第 1/3 页
C
1,146 行
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2007 Cluster File Systems, Inc. * Author: Yury Umanets <umka@clusterfs.com> * * This file is part of the Lustre file system, http://www.lustre.org * Lustre is a trademark of Cluster File Systems, Inc. * * You may have signed or agreed to another license before downloading * this software. If so, you are bound by the terms and conditions * of that agreement, and the following does not apply to you. See the * LICENSE file included with this distribution for more information. * * If you did not agree to a different license, then this copy of Lustre * is open source software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * In either case, Lustre is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * license text for more details. *//* Idea of this code is rather simple. Each second, for each server namespace * we have SLV - server lock volume which is calculated on current number of * granted locks, grant speed for past period, etc - that is, locking load. * This SLV number may be thought as a flow definition for simplicity. It is * sent to clients with each occasion to let them know what is current load * situation on the server. By default, at the beginning, SLV on server is * set max value which is calculated as the following: allow to one client * have all locks of limit ->pl_limit for 10h. * * Next, on clients, number of cached locks is not limited artificially in any * way as it was before. Instead, client calculates CLV, that is, client lock * volume for each lock and compares it with last SLV from the server. CLV is * calculated as the number of locks in LRU * lock live time in seconds. If * CLV > SLV - lock is canceled. * * Client has LVF, that is, lock volume factor which regulates how much sensitive * client should be about last SLV from server. The higher LVF is the more locks * will be canceled on client. Default value for it is 1. Setting LVF to 2 means * that client will cancel locks 2 times faster. * * Locks on a client will be canceled more intensively in these cases: * (1) if SLV is smaller, that is, load is higher on the server; * (2) client has a lot of locks (the more locks are held by client, the bigger * chances that some of them should be canceled); * (3) client has old locks (taken some time ago); * * Thus, according to flow paradigm that we use for better understanding SLV, * CLV is the volume of particle in flow described by SLV. According to this, * if flow is getting thinner, more and more particles become outside of it and * as particles are locks, they should be canceled. * * General idea of this belongs to Vitaly Fertman (vitaly@clusterfs.com). Andreas * Dilger (adilger@clusterfs.com) proposed few nice ideas like using LVF and many * cleanups. Flow definition to allow more easy understanding of the logic belongs * to Nikita Danilov (nikita@clusterfs.com) as well as many cleanups and fixes. * And design and implementation are done by Yury Umanets (umka@clusterfs.com). * * Glossary for terms used: * * pl_limit - Number of allowed locks in pool. Applies to server and client * side (tunable); * * pl_granted - Number of granted locks (calculated); * pl_grant_rate - Number of granted locks for last T (calculated); * pl_cancel_rate - Number of canceled locks for last T (calculated); * pl_grant_speed - Grant speed (GR - CR) for last T (calculated); * pl_grant_plan - Planned number of granted locks for next T (calculated); * * pl_grant_step - Grant plan step, that is how ->pl_grant_plan * will change in next T (tunable); * * pl_server_lock_volume - Current server lock volume (calculated); * * As it may be seen from list above, we have few possible tunables which may * affect behavior much. They all may be modified via proc. However, they also * give a possibility for constructing few pre-defined behavior policies. If * none of predefines is suitable for a working pattern being used, new one may * be "constructed" via proc tunables. */#define DEBUG_SUBSYSTEM S_LDLM#ifdef __KERNEL__# include <lustre_dlm.h>#else# include <liblustre.h># include <libcfs/kp30.h>#endif#include <obd_class.h>#include <obd_support.h>#include "ldlm_internal.h"#ifdef HAVE_LRU_RESIZE_SUPPORT/* 50 ldlm locks for 1MB of RAM. */#define LDLM_POOL_HOST_L ((num_physpages >> (20 - PAGE_SHIFT)) * 50)/* Default step in % for grant plan. */#define LDLM_POOL_GSP (10)/* LDLM_POOL_GSP% of all locks is default GP. */#define LDLM_POOL_GP(L) (((L) * LDLM_POOL_GSP) / 100)/* Max age for locks on clients. */#define LDLM_POOL_MAX_AGE (36000)#ifdef __KERNEL__extern cfs_proc_dir_entry_t *ldlm_ns_proc_dir;#endif#define avg(src, add) \ ((src) = ((src) + (add)) / 2)static inline __u64 dru(__u64 val, __u32 div){ __u64 ret = val + (div - 1); do_div(ret, div); return ret;}static inline __u64 ldlm_pool_slv_max(__u32 L){ /* Allow to have all locks for 1 client for 10 hrs. * Formula is the following: limit * 10h / 1 client. */ __u64 lim = L * LDLM_POOL_MAX_AGE / 1; return lim;}static inline __u64 ldlm_pool_slv_min(__u32 L){ return 1;}enum { LDLM_POOL_FIRST_STAT = 0, LDLM_POOL_GRANTED_STAT = LDLM_POOL_FIRST_STAT, LDLM_POOL_GRANT_STAT, LDLM_POOL_CANCEL_STAT, LDLM_POOL_GRANT_RATE_STAT, LDLM_POOL_CANCEL_RATE_STAT, LDLM_POOL_GRANT_PLAN_STAT, LDLM_POOL_SLV_STAT, LDLM_POOL_SHRINK_REQTD_STAT, LDLM_POOL_SHRINK_FREED_STAT, LDLM_POOL_RECALC_STAT, LDLM_POOL_TIMING_STAT, LDLM_POOL_LAST_STAT};static inline struct ldlm_namespace *ldlm_pl2ns(struct ldlm_pool *pl){ return container_of(pl, struct ldlm_namespace, ns_pool);}/* Should be called under ->pl_lock taken */static inline void ldlm_pool_recalc_grant_plan(struct ldlm_pool *pl){ int granted, grant_step, limit; limit = ldlm_pool_get_limit(pl); granted = atomic_read(&pl->pl_granted); grant_step = ((limit - granted) * pl->pl_grant_step) / 100; pl->pl_grant_plan = granted + grant_step;}/* Should be called under ->pl_lock taken */static inline void ldlm_pool_recalc_slv(struct ldlm_pool *pl){ int grant_usage, granted, grant_plan; __u64 slv, slv_factor; __u32 limit; slv = ldlm_pool_get_slv(pl); grant_plan = pl->pl_grant_plan; limit = ldlm_pool_get_limit(pl); granted = atomic_read(&pl->pl_granted); grant_usage = limit - (granted - grant_plan); if (grant_usage <= 0) grant_usage = 1; /* Find out SLV change factor which is the ratio of grant usage * from limit. SLV changes as fast as the ratio of grant plan * consumtion. The more locks from grant plan are not consumed * by clients in last interval (idle time), the faster grows * SLV. And the opposite, the more grant plan is over-consumed * (load time) the faster drops SLV. */ slv_factor = (grant_usage * 100) / limit; if (2 * abs(granted - limit) > limit) { slv_factor *= slv_factor; slv_factor = dru(slv_factor, 100); } slv = slv * slv_factor; slv = dru(slv, 100); if (slv > ldlm_pool_slv_max(limit)) { slv = ldlm_pool_slv_max(limit); } else if (slv < ldlm_pool_slv_min(limit)) { slv = ldlm_pool_slv_min(limit); } ldlm_pool_set_slv(pl, slv);}static inline void ldlm_pool_recalc_stats(struct ldlm_pool *pl){ __u64 slv = ldlm_pool_get_slv(pl); int grant_plan = pl->pl_grant_plan; int granted = atomic_read(&pl->pl_granted); int grant_rate = atomic_read(&pl->pl_grant_rate); int cancel_rate = atomic_read(&pl->pl_cancel_rate); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_SLV_STAT, slv); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANTED_STAT, granted); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT, grant_rate); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_PLAN_STAT, grant_plan); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_CANCEL_RATE_STAT, cancel_rate);}static int ldlm_srv_pool_recalc(struct ldlm_pool *pl){ time_t recalc_interval_sec; ENTRY; spin_lock(&pl->pl_lock); recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time; if (recalc_interval_sec > 0) { /* Update statistics */ ldlm_pool_recalc_stats(pl); /* Recalc SLV after last period. This should be done * _before_ recalculating new grant plan. */ ldlm_pool_recalc_slv(pl); /* Update grant_plan for new period. */ ldlm_pool_recalc_grant_plan(pl); /* Zero out all rates and speed for the last period. */ atomic_set(&pl->pl_grant_rate, 0); atomic_set(&pl->pl_cancel_rate, 0); atomic_set(&pl->pl_grant_speed, 0); pl->pl_recalc_time = cfs_time_current_sec(); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, recalc_interval_sec); } spin_unlock(&pl->pl_lock); RETURN(0);}/* Our goal here is to decrease SLV the way to make a client hold * @nr locks smaller in next 10h. */static int ldlm_srv_pool_shrink(struct ldlm_pool *pl, int nr, unsigned int gfp_mask){ __u32 limit; ENTRY; /* VM is asking how many entries may be potentially freed. */ if (nr == 0) RETURN(atomic_read(&pl->pl_granted)); /* Client already canceled locks but server is already in shrinker * and can't cancel anything. Let's catch this race. */ if (atomic_read(&pl->pl_granted) == 0) RETURN(0); spin_lock(&pl->pl_lock); /* We want shrinker to possibly cause cancelation of @nr locks from * clients or grant approximately @nr locks smaller next intervals. * * This is why we decresed SLV by @nr. This effect will only be as * long as one re-calc interval (1s these days) and this should be * enough to pass this decreased SLV to all clients. On next recalc * interval pool will either increase SLV if locks load is not high * or will keep on same level or even decrease again, thus, shrinker * decreased SLV will affect next recalc intervals and this way will * make locking load lower. */ if (nr < ldlm_pool_get_slv(pl)) { ldlm_pool_set_slv(pl, ldlm_pool_get_slv(pl) - nr); } else { limit = ldlm_pool_get_limit(pl); ldlm_pool_set_slv(pl, ldlm_pool_slv_min(limit)); } spin_unlock(&pl->pl_lock); /* We did not really free any memory here so far, it only will be * freed later may be, so that we return 0 to not confuse VM. */ RETURN(0);}static int ldlm_srv_pool_setup(struct ldlm_pool *pl, int limit){ ENTRY; ldlm_pool_set_limit(pl, limit); RETURN(0);}static int ldlm_cli_pool_recalc(struct ldlm_pool *pl){ time_t recalc_interval_sec; ENTRY; spin_lock(&pl->pl_lock); recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time; if (recalc_interval_sec > 0) { /* Update statistics only every T */ ldlm_pool_recalc_stats(pl); /* Zero out grant/cancel rates and speed for last period. */ atomic_set(&pl->pl_grant_rate, 0); atomic_set(&pl->pl_cancel_rate, 0); atomic_set(&pl->pl_grant_speed, 0); pl->pl_recalc_time = cfs_time_current_sec(); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, recalc_interval_sec); } spin_unlock(&pl->pl_lock); /* Do not cancel locks in case lru resize is disabled for this ns */ if (!ns_connect_lru_resize(ldlm_pl2ns(pl))) RETURN(0); /* In the time of canceling locks on client we do not need to maintain * sharp timing, we only want to cancel locks asap according to new SLV. * This may be called when SLV has changed much, this is why we do not * take into account pl->pl_recalc_time here. */ RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_ASYNC, LDLM_CANCEL_LRUR));}static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, int nr, unsigned int gfp_mask){ ENTRY; /* Do not cancel locks in case lru resize is disabled for this ns */ if (!ns_connect_lru_resize(ldlm_pl2ns(pl))) RETURN(0); /* Find out how many locks may be released according to shrink * policy. */ if (nr == 0) RETURN(ldlm_cancel_lru_estimate(ldlm_pl2ns(pl), 0, 0, LDLM_CANCEL_SHRINK)); /* Cancel @nr locks accoding to shrink policy */ RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), nr, LDLM_SYNC, LDLM_CANCEL_SHRINK));}struct ldlm_pool_ops ldlm_srv_pool_ops = { .po_recalc = ldlm_srv_pool_recalc, .po_shrink = ldlm_srv_pool_shrink, .po_setup = ldlm_srv_pool_setup};struct ldlm_pool_ops ldlm_cli_pool_ops = { .po_recalc = ldlm_cli_pool_recalc, .po_shrink = ldlm_cli_pool_shrink};int ldlm_pool_recalc(struct ldlm_pool *pl){ int count; if (pl->pl_ops->po_recalc != NULL) { count = pl->pl_ops->po_recalc(pl); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT,
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?