ldlm_pool.c

来自「lustre 1.6.5 source code」· C语言 代码 · 共 1,146 行 · 第 1/3 页

C
1,146
字号
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * *  Copyright (c) 2007 Cluster File Systems, Inc. *   Author: Yury Umanets <umka@clusterfs.com> * *   This file is part of the Lustre file system, http://www.lustre.org *   Lustre is a trademark of Cluster File Systems, Inc. * *   You may have signed or agreed to another license before downloading *   this software.  If so, you are bound by the terms and conditions *   of that agreement, and the following does not apply to you.  See the *   LICENSE file included with this distribution for more information. * *   If you did not agree to a different license, then this copy of Lustre *   is open source software; you can redistribute it and/or modify it *   under the terms of version 2 of the GNU General Public License as *   published by the Free Software Foundation. * *   In either case, Lustre is distributed in the hope that it will be *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *   license text for more details. *//* Idea of this code is rather simple. Each second, for each server namespace * we have SLV - server lock volume which is calculated on current number of * granted locks, grant speed for past period, etc - that is, locking load. * This SLV number may be thought as a flow definition for simplicity. It is * sent to clients with each occasion to let them know what is current load * situation on the server. By default, at the beginning, SLV on server is * set max value which is calculated as the following: allow to one client * have all locks of limit ->pl_limit for 10h. * * Next, on clients, number of cached locks is not limited artificially in any * way as it was before. Instead, client calculates CLV, that is, client lock * volume for each lock and compares it with last SLV from the server. CLV is * calculated as the number of locks in LRU * lock live time in seconds. If * CLV > SLV - lock is canceled. * * Client has LVF, that is, lock volume factor which regulates how much sensitive * client should be about last SLV from server. The higher LVF is the more locks * will be canceled on client. Default value for it is 1. Setting LVF to 2 means * that client will cancel locks 2 times faster. * * Locks on a client will be canceled more intensively in these cases: * (1) if SLV is smaller, that is, load is higher on the server; * (2) client has a lot of locks (the more locks are held by client, the bigger *     chances that some of them should be canceled); * (3) client has old locks (taken some time ago); * * Thus, according to flow paradigm that we use for better understanding SLV, * CLV is the volume of particle in flow described by SLV. According to this, * if flow is getting thinner, more and more particles become outside of it and * as particles are locks, they should be canceled. * * General idea of this belongs to Vitaly Fertman (vitaly@clusterfs.com). Andreas * Dilger (adilger@clusterfs.com) proposed few nice ideas like using LVF and many * cleanups. Flow definition to allow more easy understanding of the logic belongs * to Nikita Danilov (nikita@clusterfs.com) as well as many cleanups and fixes. * And design and implementation are done by Yury Umanets (umka@clusterfs.com). * * Glossary for terms used: * * pl_limit - Number of allowed locks in pool. Applies to server and client * side (tunable); * * pl_granted - Number of granted locks (calculated); * pl_grant_rate - Number of granted locks for last T (calculated); * pl_cancel_rate - Number of canceled locks for last T (calculated); * pl_grant_speed - Grant speed (GR - CR) for last T (calculated); * pl_grant_plan - Planned number of granted locks for next T (calculated); * * pl_grant_step - Grant plan step, that is how ->pl_grant_plan * will change in next T (tunable); * * pl_server_lock_volume - Current server lock volume (calculated); * * As it may be seen from list above, we have few possible tunables which may * affect behavior much. They all may be modified via proc. However, they also * give a possibility for constructing few pre-defined behavior policies. If * none of predefines is suitable for a working pattern being used, new one may * be "constructed" via proc tunables. */#define DEBUG_SUBSYSTEM S_LDLM#ifdef __KERNEL__# include <lustre_dlm.h>#else# include <liblustre.h># include <libcfs/kp30.h>#endif#include <obd_class.h>#include <obd_support.h>#include "ldlm_internal.h"#ifdef HAVE_LRU_RESIZE_SUPPORT/* 50 ldlm locks for 1MB of RAM. */#define LDLM_POOL_HOST_L ((num_physpages >> (20 - PAGE_SHIFT)) * 50)/* Default step in % for grant plan. */#define LDLM_POOL_GSP (10)/* LDLM_POOL_GSP% of all locks is default GP. */#define LDLM_POOL_GP(L)   (((L) * LDLM_POOL_GSP) / 100)/* Max age for locks on clients. */#define LDLM_POOL_MAX_AGE (36000)#ifdef __KERNEL__extern cfs_proc_dir_entry_t *ldlm_ns_proc_dir;#endif#define avg(src, add) \        ((src) = ((src) + (add)) / 2)static inline __u64 dru(__u64 val, __u32 div){        __u64 ret = val + (div - 1);        do_div(ret, div);        return ret;}static inline __u64 ldlm_pool_slv_max(__u32 L){        /* Allow to have all locks for 1 client for 10 hrs.         * Formula is the following: limit * 10h / 1 client. */        __u64 lim = L *  LDLM_POOL_MAX_AGE / 1;        return lim;}static inline __u64 ldlm_pool_slv_min(__u32 L){        return 1;}enum {        LDLM_POOL_FIRST_STAT = 0,        LDLM_POOL_GRANTED_STAT = LDLM_POOL_FIRST_STAT,        LDLM_POOL_GRANT_STAT,        LDLM_POOL_CANCEL_STAT,        LDLM_POOL_GRANT_RATE_STAT,        LDLM_POOL_CANCEL_RATE_STAT,        LDLM_POOL_GRANT_PLAN_STAT,        LDLM_POOL_SLV_STAT,        LDLM_POOL_SHRINK_REQTD_STAT,        LDLM_POOL_SHRINK_FREED_STAT,        LDLM_POOL_RECALC_STAT,        LDLM_POOL_TIMING_STAT,        LDLM_POOL_LAST_STAT};static inline struct ldlm_namespace *ldlm_pl2ns(struct ldlm_pool *pl){        return container_of(pl, struct ldlm_namespace, ns_pool);}/* Should be called under ->pl_lock taken */static inline void ldlm_pool_recalc_grant_plan(struct ldlm_pool *pl){        int granted, grant_step, limit;                limit = ldlm_pool_get_limit(pl);        granted = atomic_read(&pl->pl_granted);        grant_step = ((limit - granted) * pl->pl_grant_step) / 100;        pl->pl_grant_plan = granted + grant_step;}/* Should be called under ->pl_lock taken */static inline void ldlm_pool_recalc_slv(struct ldlm_pool *pl){        int grant_usage, granted, grant_plan;        __u64 slv, slv_factor;        __u32 limit;        slv = ldlm_pool_get_slv(pl);        grant_plan = pl->pl_grant_plan;        limit = ldlm_pool_get_limit(pl);        granted = atomic_read(&pl->pl_granted);        grant_usage = limit - (granted - grant_plan);        if (grant_usage <= 0)                grant_usage = 1;        /* Find out SLV change factor which is the ratio of grant usage          * from limit. SLV changes as fast as the ratio of grant plan          * consumtion. The more locks from grant plan are not consumed          * by clients in last interval (idle time), the faster grows          * SLV. And the opposite, the more grant plan is over-consumed         * (load time) the faster drops SLV. */        slv_factor = (grant_usage * 100) / limit;        if (2 * abs(granted - limit) > limit) {                slv_factor *= slv_factor;                slv_factor = dru(slv_factor, 100);        }        slv = slv * slv_factor;        slv = dru(slv, 100);        if (slv > ldlm_pool_slv_max(limit)) {                slv = ldlm_pool_slv_max(limit);        } else if (slv < ldlm_pool_slv_min(limit)) {                slv = ldlm_pool_slv_min(limit);        }        ldlm_pool_set_slv(pl, slv);}static inline void ldlm_pool_recalc_stats(struct ldlm_pool *pl){        __u64 slv = ldlm_pool_get_slv(pl);        int grant_plan = pl->pl_grant_plan;        int granted = atomic_read(&pl->pl_granted);        int grant_rate = atomic_read(&pl->pl_grant_rate);        int cancel_rate = atomic_read(&pl->pl_cancel_rate);        lprocfs_counter_add(pl->pl_stats, LDLM_POOL_SLV_STAT,                             slv);        lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANTED_STAT,                            granted);        lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT,                            grant_rate);        lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_PLAN_STAT,                            grant_plan);        lprocfs_counter_add(pl->pl_stats, LDLM_POOL_CANCEL_RATE_STAT,                            cancel_rate);}static int ldlm_srv_pool_recalc(struct ldlm_pool *pl){        time_t recalc_interval_sec;        ENTRY;        spin_lock(&pl->pl_lock);        recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;        if (recalc_interval_sec > 0) {                /* Update statistics */                ldlm_pool_recalc_stats(pl);                /* Recalc SLV after last period. This should be done                 * _before_ recalculating new grant plan. */                ldlm_pool_recalc_slv(pl);                /* Update grant_plan for new period. */                ldlm_pool_recalc_grant_plan(pl);                /* Zero out all rates and speed for the last period. */                atomic_set(&pl->pl_grant_rate, 0);                atomic_set(&pl->pl_cancel_rate, 0);                atomic_set(&pl->pl_grant_speed, 0);                pl->pl_recalc_time = cfs_time_current_sec();                lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,                                     recalc_interval_sec);        }        spin_unlock(&pl->pl_lock);        RETURN(0);}/* Our goal here is to decrease SLV the way to make a client hold * @nr locks smaller in next 10h. */static int ldlm_srv_pool_shrink(struct ldlm_pool *pl,                                int nr, unsigned int gfp_mask){        __u32 limit;        ENTRY;        /* VM is asking how many entries may be potentially freed. */        if (nr == 0)                RETURN(atomic_read(&pl->pl_granted));        /* Client already canceled locks but server is already in shrinker         * and can't cancel anything. Let's catch this race. */        if (atomic_read(&pl->pl_granted) == 0)                RETURN(0);        spin_lock(&pl->pl_lock);        /* We want shrinker to possibly cause cancelation of @nr locks from         * clients or grant approximately @nr locks smaller next intervals.         *         * This is why we decresed SLV by @nr. This effect will only be as         * long as one re-calc interval (1s these days) and this should be         * enough to pass this decreased SLV to all clients. On next recalc         * interval pool will either increase SLV if locks load is not high         * or will keep on same level or even decrease again, thus, shrinker         * decreased SLV will affect next recalc intervals and this way will         * make locking load lower. */        if (nr < ldlm_pool_get_slv(pl)) {                ldlm_pool_set_slv(pl, ldlm_pool_get_slv(pl) - nr);        } else {                limit = ldlm_pool_get_limit(pl);                ldlm_pool_set_slv(pl, ldlm_pool_slv_min(limit));        }        spin_unlock(&pl->pl_lock);        /* We did not really free any memory here so far, it only will be         * freed later may be, so that we return 0 to not confuse VM. */        RETURN(0);}static int ldlm_srv_pool_setup(struct ldlm_pool *pl, int limit){        ENTRY;        ldlm_pool_set_limit(pl, limit);        RETURN(0);}static int ldlm_cli_pool_recalc(struct ldlm_pool *pl){        time_t recalc_interval_sec;        ENTRY;        spin_lock(&pl->pl_lock);        recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;        if (recalc_interval_sec > 0) {                /* Update statistics only every T */                ldlm_pool_recalc_stats(pl);                /* Zero out grant/cancel rates and speed for last period. */                atomic_set(&pl->pl_grant_rate, 0);                atomic_set(&pl->pl_cancel_rate, 0);                atomic_set(&pl->pl_grant_speed, 0);                pl->pl_recalc_time = cfs_time_current_sec();                lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,                                     recalc_interval_sec);        }        spin_unlock(&pl->pl_lock);        /* Do not cancel locks in case lru resize is disabled for this ns */        if (!ns_connect_lru_resize(ldlm_pl2ns(pl)))                RETURN(0);        /* In the time of canceling locks on client we do not need to maintain         * sharp timing, we only want to cancel locks asap according to new SLV.         * This may be called when SLV has changed much, this is why we do not         * take into account pl->pl_recalc_time here. */        RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_ASYNC,                                LDLM_CANCEL_LRUR));}static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,                                int nr, unsigned int gfp_mask){        ENTRY;                /* Do not cancel locks in case lru resize is disabled for this ns */        if (!ns_connect_lru_resize(ldlm_pl2ns(pl)))                RETURN(0);        /* Find out how many locks may be released according to shrink          * policy. */        if (nr == 0)                RETURN(ldlm_cancel_lru_estimate(ldlm_pl2ns(pl), 0, 0,                                                 LDLM_CANCEL_SHRINK));        /* Cancel @nr locks accoding to shrink policy */        RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), nr, LDLM_SYNC,                                LDLM_CANCEL_SHRINK));}struct ldlm_pool_ops ldlm_srv_pool_ops = {        .po_recalc = ldlm_srv_pool_recalc,        .po_shrink = ldlm_srv_pool_shrink,        .po_setup  = ldlm_srv_pool_setup};struct ldlm_pool_ops ldlm_cli_pool_ops = {        .po_recalc = ldlm_cli_pool_recalc,        .po_shrink = ldlm_cli_pool_shrink};int ldlm_pool_recalc(struct ldlm_pool *pl){        int count;        if (pl->pl_ops->po_recalc != NULL) {                count = pl->pl_ops->po_recalc(pl);                lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT, 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?