📄 mdc_locks.c
字号:
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (C) 2001-2003 Cluster File Systems, Inc. * * This file is part of the Lustre file system, http://www.lustre.org * Lustre is a trademark of Cluster File Systems, Inc. * * You may have signed or agreed to another license before downloading * this software. If so, you are bound by the terms and conditions * of that agreement, and the following does not apply to you. See the * LICENSE file included with this distribution for more information. * * If you did not agree to a different license, then this copy of Lustre * is open source software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * In either case, Lustre is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * license text for more details. */#ifndef EXPORT_SYMTAB# define EXPORT_SYMTAB#endif#define DEBUG_SUBSYSTEM S_MDC#ifdef __KERNEL__# include <linux/module.h># include <linux/pagemap.h># include <linux/miscdevice.h># include <linux/init.h>#else# include <liblustre.h>#endif#include <obd_class.h>#include <lustre_dlm.h>#include <lprocfs_status.h>#include "mdc_internal.h"int it_disposition(struct lookup_intent *it, int flag){ return it->d.lustre.it_disposition & flag;}EXPORT_SYMBOL(it_disposition);void it_set_disposition(struct lookup_intent *it, int flag){ it->d.lustre.it_disposition |= flag;}EXPORT_SYMBOL(it_set_disposition);void it_clear_disposition(struct lookup_intent *it, int flag){ it->d.lustre.it_disposition &= ~flag;}EXPORT_SYMBOL(it_clear_disposition);int it_open_error(int phase, struct lookup_intent *it){ if (it_disposition(it, DISP_OPEN_OPEN)) { if (phase >= DISP_OPEN_OPEN) return it->d.lustre.it_status; else return 0; } if (it_disposition(it, DISP_OPEN_CREATE)) { if (phase >= DISP_OPEN_CREATE) return it->d.lustre.it_status; else return 0; } if (it_disposition(it, DISP_LOOKUP_EXECD)) { if (phase >= DISP_LOOKUP_EXECD) return it->d.lustre.it_status; else return 0; } if (it_disposition(it, DISP_IT_EXECD)) { if (phase >= DISP_IT_EXECD) return it->d.lustre.it_status; else return 0; } CERROR("it disp: %X, status: %d\n", it->d.lustre.it_disposition, it->d.lustre.it_status); LBUG(); return 0;}EXPORT_SYMBOL(it_open_error);/* this must be called on a lockh that is known to have a referenced lock */void mdc_set_lock_data(__u64 *l, void *data){ struct ldlm_lock *lock; struct lustre_handle *lockh = (struct lustre_handle *)l; ENTRY; if (!*l) { EXIT; return; } lock = ldlm_handle2lock(lockh); LASSERT(lock != NULL); lock_res_and_lock(lock);#ifdef __KERNEL__ if (lock->l_ast_data && lock->l_ast_data != data) { struct inode *new_inode = data; struct inode *old_inode = lock->l_ast_data; LASSERTF(old_inode->i_state & I_FREEING, "Found existing inode %p/%lu/%u state %lu in lock: " "setting data to %p/%lu/%u\n", old_inode, old_inode->i_ino, old_inode->i_generation, old_inode->i_state, new_inode, new_inode->i_ino, new_inode->i_generation); }#endif lock->l_ast_data = data; unlock_res_and_lock(lock); LDLM_LOCK_PUT(lock); EXIT;}EXPORT_SYMBOL(mdc_set_lock_data);int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid, ldlm_iterator_t it, void *data){ struct ldlm_res_id res_id = { .name = {0} }; ENTRY; res_id.name[0] = fid->id; res_id.name[1] = fid->generation; ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, &res_id, it, data); EXIT; return 0;}static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc){ /* Don't hold error requests for replay. */ if (req->rq_replay) { spin_lock(&req->rq_lock); req->rq_replay = 0; spin_unlock(&req->rq_lock); } if (rc && req->rq_transno != 0) { DEBUG_REQ(D_ERROR, req, "transno returned on error rc %d", rc); LBUG(); }}static int round_up(int val){ int ret = 1; while (val) { val >>= 1; ret <<= 1; } return ret;}/* Save a large LOV EA into the request buffer so that it is available * for replay. We don't do this in the initial request because the * original request doesn't need this buffer (at most it sends just the * lov_mds_md) and it is a waste of RAM/bandwidth to send the empty * buffer and may also be difficult to allocate and save a very large * request buffer for each open. (bug 5707) * * OOM here may cause recovery failure if lmm is needed (only for the * original open if the MDS crashed just when this client also OOM'd) * but this is incredibly unlikely, and questionable whether the client * could do MDS recovery under OOM anyways... */static void mdc_realloc_openmsg(struct ptlrpc_request *req, struct mds_body *body){ int old_len, new_size, old_size; struct lustre_msg *old_msg = req->rq_reqmsg; struct lustre_msg *new_msg; old_len = lustre_msg_buflen(old_msg, DLM_INTENT_REC_OFF + 2); old_size = lustre_packed_msg_size(old_msg); lustre_msg_set_buflen(old_msg, DLM_INTENT_REC_OFF + 2, body->eadatasize); new_size = lustre_packed_msg_size(old_msg); OBD_ALLOC(new_msg, new_size); if (new_msg != NULL) { DEBUG_REQ(D_INFO, req, "replace reqmsg for larger EA %u\n", body->eadatasize); memcpy(new_msg, old_msg, old_size); spin_lock(&req->rq_lock); req->rq_reqmsg = new_msg; req->rq_reqlen = new_size; spin_unlock(&req->rq_lock); OBD_FREE(old_msg, old_size); } else { lustre_msg_set_buflen(old_msg, DLM_INTENT_REC_OFF + 2, old_len); body->valid &= ~OBD_MD_FLEASIZE; body->eadatasize = 0; }}static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp, struct lookup_intent *it, struct mdc_op_data *data, void *lmm, int lmmsize){ struct ptlrpc_request *req; struct ldlm_intent *lit; struct obd_device *obddev = class_exp2obd(exp); int size[7] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body), [DLM_LOCKREQ_OFF] = sizeof(struct ldlm_request), [DLM_INTENT_IT_OFF] = sizeof(*lit), [DLM_INTENT_REC_OFF] = sizeof(struct mds_rec_create), [DLM_INTENT_REC_OFF+1]= data->namelen + 1, /* As an optimization, we allocate an RPC request buffer * for at least a default-sized LOV EA even if we aren't * sending one. We grow the whole request to the next * power-of-two size since we get that much from a slab * allocation anyways. This avoids an allocation below * in the common case where we need to save a * default-sized LOV EA for open replay. */ [DLM_INTENT_REC_OFF+2]= max(lmmsize, obddev->u.cli.cl_default_mds_easize) }; int repsize[5] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body), [DLM_LOCKREPLY_OFF] = sizeof(struct ldlm_reply), [DLM_REPLY_REC_OFF] = sizeof(struct mds_body), [DLM_REPLY_REC_OFF+1] = obddev->u.cli. cl_max_mds_easize, [DLM_REPLY_REC_OFF+2] = LUSTRE_POSIX_ACL_MAX_SIZE }; CFS_LIST_HEAD(cancels); int do_join = (it->it_flags & O_JOIN_FILE) && data->data; int count = 0; int mode; int rc; it->it_create_mode |= S_IFREG; rc = lustre_msg_size(class_exp2cliimp(exp)->imp_msg_magic, 6, size); if (rc & (rc - 1)) size[DLM_INTENT_REC_OFF + 2] = min(size[DLM_INTENT_REC_OFF + 2] + round_up(rc) - rc, obddev->u.cli.cl_max_mds_easize); /* If inode is known, cancel conflicting OPEN locks. */ if (data->fid2.id) { if (it->it_flags & (FMODE_WRITE|MDS_OPEN_TRUNC)) mode = LCK_CW;#ifdef FMODE_EXEC else if (it->it_flags & FMODE_EXEC) mode = LCK_PR;#endif else mode = LCK_CR; count = mdc_resource_get_unused(exp, &data->fid2, &cancels, mode, MDS_INODELOCK_OPEN); } /* If CREATE or JOIN_FILE, cancel parent's UPDATE lock. */ if (it->it_op & IT_CREAT || do_join) mode = LCK_EX; else mode = LCK_CR; count += mdc_resource_get_unused(exp, &data->fid1, &cancels, mode, MDS_INODELOCK_UPDATE); if (do_join) { __u64 head_size = (*(__u64 *)data->data); /* join is like an unlink of the tail */ size[DLM_INTENT_REC_OFF + 3] = sizeof(struct mds_rec_join); req = ldlm_prep_enqueue_req(exp, 7, size, &cancels, count); if (req) mdc_join_pack(req, DLM_INTENT_REC_OFF + 3, data, head_size); } else { req = ldlm_prep_enqueue_req(exp, 6, size, &cancels, count); it->it_flags &= ~O_JOIN_FILE; } if (req) { spin_lock(&req->rq_lock); req->rq_replay = 1; spin_unlock(&req->rq_lock); /* pack the intent */ lit = lustre_msg_buf(req->rq_reqmsg, DLM_INTENT_IT_OFF, sizeof(*lit)); lit->opc = (__u64)it->it_op; /* pack the intended request */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -