📄 afr.c
字号:
/* Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> This file is part of GlusterFS. GlusterFS is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GlusterFS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.*//* * TODO: * 1) Check the FIXMEs * 2) There are no known mem leaks, check once again */#include <libgen.h>#include <unistd.h>#include <fnmatch.h>#include <sys/time.h>#include <stdlib.h>#ifndef _CONFIG_H#define _CONFIG_H#include "config.h"#endif#include "glusterfs.h"#include "afr.h"#include "dict.h"#include "xlator.h"#include "hashfn.h"#include "logging.h"#include "stack.h"#include "list.h"#include "call-stub.h"#include "defaults.h"#include "common-utils.h"#define BUF_SIZE 512#define AFR_DEBUG_FMT(xl, format, args...) if(((afr_private_t*)(xl)->private)->debug) gf_log ((xl)->name, GF_LOG_DEBUG, "AFRDEBUG:" format, ##args);#define AFR_DEBUG(xl) if(((afr_private_t*)xl->private)->debug) gf_log (xl->name, GF_LOG_DEBUG, "AFRDEBUG:");#define AFR_ERRNO_DUP(child_errno, afr_errno, child_count) do {\ child_errno = alloca(child_count);\ memcpy (child_errno, afr_errno, child_count);\} while(0);extern void afr_lookup_directory_selfheal (call_frame_t *);loc_t*afr_loc_dup(loc_t *loc){ loc_t *loctmp; GF_BUG_ON (!loc); loctmp = calloc(1, sizeof(loc_t)); loctmp->inode = loc->inode; loctmp->path = strdup (loc->path); return loctmp;}voidafr_loc_free(loc_t *loc){ GF_BUG_ON (!loc); freee (loc->path); freee(loc);}inline void afr_free_ashptr (afr_selfheal_t * ashptr, int32_t child_count, int32_t latest){ freee (ashptr);}int32_tafr_sync_ownership_permission_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct stat *stbuf){ afr_local_t *local = frame->local; call_frame_t *prev_frame = cookie; afr_private_t *pvt = this->private; xlator_t **children = pvt->children; int32_t child_count = pvt->child_count; int32_t callcnt, i, first = -1, latest = -1; struct stat *statptr = local->statptr; char *child_errno = NULL; inode_t *inoptr = local->loc->inode; dict_t *xattr; AFR_DEBUG (this); child_errno = data_to_ptr (dict_get(local->loc->inode->ctx, this->name)); for (i = 0; i < child_count; i++) if (prev_frame->this == children[i]) break; if (op_ret == 0) { GF_BUG_ON (!stbuf); statptr[i] = *stbuf; } else { GF_ERROR (this, "(path=%s child=%s) op_ret=%d op_errno=%d", local->loc->path, prev_frame->this->name, op_ret, op_errno); } LOCK (&frame->lock); callcnt = --local->call_count; UNLOCK (&frame->lock); if (callcnt == 0) { for (i = 0; i < child_count; i++) { if (child_errno[i] == 0) { if (first == -1) { first = i; latest = i; continue; } if (statptr[i].st_mtime > statptr[latest].st_mtime) latest = i; } } if (first == -1) { GF_WARNING (this, "first == -1"); first = latest = 0; } afr_loc_free(local->loc); afr_free_ashptr (local->ashptr, child_count, local->latest); if (local->ino) statptr[latest].st_ino = local->ino; else statptr[latest].st_ino = statptr[first].st_ino; xattr = local->latest_xattr; STACK_UNWIND (frame, local->op_ret, local->op_errno, inoptr, &statptr[latest], xattr); if (xattr) dict_unref (xattr); freee (statptr); } return 0;}/* * afr_sync_ownership_permission - sync ownership and permission attributes * * @frame: we are doing syncing in frame's context */int32_tafr_sync_ownership_permission (call_frame_t *frame){ char *child_errno = NULL; afr_local_t *local = frame->local; inode_t *inode = local->loc->inode; afr_private_t *pvt = frame->this->private; xlator_t **children = pvt->children; int32_t child_count = pvt->child_count; int32_t i, first = -1; int32_t latest = -1; /* to keep track of the the child node, which contains the most recent entry */ struct stat *statptr = local->statptr; dict_t *xattr; child_errno = data_to_ptr (dict_get(local->loc->inode->ctx, frame->this->name)); /* krishna claims child_errno can't be null, but we are paranoid */ GF_BUG_ON (!child_errno); /* we get the stat info with the latest ctime * ctime indicates the time when there was any modification to the * inode like permission, mode etc */ for (i = 0; i < child_count; i++) { if (child_errno[i] == 0) { if (latest == -1) { latest = i; continue; } if (statptr[i].st_ctime > statptr[latest].st_ctime) latest = i; } } AFR_DEBUG_FMT (frame->this, "latest %s uid %u gid %u %d", children[latest]->name, statptr[latest].st_uid, statptr[latest].st_gid, statptr[latest].st_mode); /* find out if there are any stat whose uid/gid/mode mismatch */ for (i = 0; i < child_count; i++) { if (child_errno[i] == 0) { if (statptr[latest].st_uid != statptr[i].st_uid || statptr[latest].st_gid != statptr[i].st_gid) { local->call_count++; } if (statptr[latest].st_mode != statptr[i].st_mode) { local->call_count++; } } } AFR_DEBUG_FMT (frame->this, "local->call_count %d", local->call_count); if (local->call_count) { local->stbuf = statptr[latest]; /* in case there was any uid/gid/mode mismatch, we rectify it as root */ for (i = 0; i < child_count; i++) { if (child_errno[i] == 0) { if (i == latest) continue; if (statptr[latest].st_uid != statptr[i].st_uid || statptr[latest].st_gid != statptr[i].st_gid) { GF_DEBUG (frame->this, "uid/gid mismatch, latest on %s, calling chown(%s, %u, %u) on %s", children[latest]->name, local->loc->path, statptr[latest].st_uid, statptr[latest].st_gid, children[i]->name); STACK_WIND (frame, afr_sync_ownership_permission_cbk, children[i], children[i]->fops->chown, local->loc, statptr[latest].st_uid, statptr[latest].st_gid); } if (statptr[latest].st_mode != statptr[i].st_mode) { GF_DEBUG (frame->this, "mode mismatch, latest on %s, calling chmod(%s, 0%o) on %s", children[latest]->name, local->loc->path, statptr[latest].st_mode, children[i]->name); STACK_WIND (frame, afr_sync_ownership_permission_cbk, children[i], children[i]->fops->chmod, local->loc, statptr[latest].st_mode); } } } return 0; } /* we reach here means no self-heal is needed */ for (i = 0; i < child_count; i++) { if (child_errno[i] == 0) { if (first == -1) { first = i; latest = i; continue; } if (statptr[i].st_mtime > statptr[latest].st_mtime) latest = i; } } if (first == -1) { GF_WARNING (frame->this, "first == -1"); first = latest = 0; } if (local->ino) statptr[latest].st_ino = local->ino; else statptr[latest].st_ino = statptr[first].st_ino; afr_loc_free(local->loc); afr_free_ashptr (local->ashptr, child_count, local->latest); xattr = local->latest_xattr; STACK_UNWIND (frame, local->op_ret, local->op_errno, inode, &statptr[latest], xattr); if (xattr) dict_unref (xattr); freee (statptr); return 0;}int32_tafr_lookup_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno){ afr_local_t *local = frame->local; call_frame_t *prev_frame = cookie; AFR_DEBUG_FMT (this, "(child=%s) op_ret=%d op_errno=%d", prev_frame->this->name, op_ret, op_errno); if (local->rmelem_status) { loc_t *loc = local->loc; afr_selfheal_t *ashptr = local->ashptr; struct stat *statptr = local->statptr; afr_private_t *pvt = this->private; afr_loc_free (loc); afr_free_ashptr (ashptr, pvt->child_count, local->latest); freee (statptr); if (local->latest_xattr) dict_unref (local->latest_xattr); STACK_UNWIND (frame, -1, EIO, local->loc->inode, NULL, NULL); return 0; } afr_sync_ownership_permission (frame); return 0;}int32_tafr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct stat *buf, dict_t *xattr);int32_tafr_lookup_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno){ afr_local_t *local = frame->local; afr_private_t *pvt = this->private; int32_t child_count = pvt->child_count; int32_t i; xlator_t **children = pvt->children; AFR_DEBUG_FMT (this, "op_ret=%d op_errno=%d", op_ret, op_errno); local->call_count = child_count; local->op_ret = -1; local->op_errno = ENOTCONN; for (i = 0; i < child_count; i++) { STACK_WIND (frame, afr_lookup_cbk, children[i], children[i]->fops->lookup, local->loc, 1); } return 0;}/* * afr_check_ctime_version * * @frame: call frame, this is the context in which we will try to complete the directory self-heal * */voidafr_check_ctime_version (call_frame_t *frame){ /* * if not a directory, call sync perm/ownership function * if it is a directory, compare the ctime/versions * if they are same, call sync perm/owenership function * if they differ, lock the path * in lock_cbk, get dirents from the latest and the outdated children * note down all the elements (files/dirs/links) that need to be deleted from the outdated children * call remove_elem on the elements that need to be removed. * in the cbk, update the ctime/version on the outdated children * in the cbk call sync perm/ownership function. */ /* we need to increment the 'version' count whenever there is change in contents * of a directory, which can happen during the fops mentioned on next line: * create(), unlink(), rmdir(), mkdir(), symlink(), link(), rename(), mknod() */ char *child_errno = NULL; int32_t latest = 0, differ = 0, first = 0, i; afr_local_t *local = frame->local; afr_private_t *pvt = frame->this->private; int32_t child_count = pvt->child_count; struct stat *statptr = local->statptr; afr_selfheal_t *ashptr = local->ashptr; xlator_t **children = pvt->children; char *state = pvt->state; AFR_DEBUG (frame->this); /* child_errno cant be NULL */ child_errno = data_to_ptr (dict_get(local->loc->inode->ctx, frame->this->name)); GF_BUG_ON (!child_errno); /* 'i' will be the index to the first child node which returned the fop with complete success */ for (i = 0; i < child_count; i++) if (child_errno[i] == 0) break; latest = first = i; /* this is valid else we wouldnt have got called */ if (S_ISDIR(statptr[i].st_mode) == 0) { /* in case this is not directory, we shouldn't call directory selfheal code */ afr_sync_ownership_permission (frame); return; } for (i = 0; i < child_count; i++) { if (child_errno[i] == 0) { if (ashptr[i].ctime != ashptr[latest].ctime || ashptr[i].version != ashptr[latest].version) { differ = 1; } if (ashptr[i].ctime > ashptr[latest].ctime) { latest = i; } else if (ashptr[i].ctime == ashptr[latest].ctime && ashptr[i].version > ashptr[latest].version) { latest = i; } } } if (differ == 0) { if (local->lock_node) { char *lock_path = NULL; asprintf (&lock_path, "/%s%s", local->lock_node->name, local->loc->path); STACK_WIND (frame, afr_lookup_unlock_cbk, local->lock_node, local->lock_node->mops->unlock, lock_path); freee (lock_path); } else afr_sync_ownership_permission (frame); return; } for (i = 0; i < child_count; i++) { if (pvt->state[i]) break; } if (i == child_count) { if (local->lock_node) { char *lock_path = NULL; asprintf (&lock_path, "/%s%s", local->lock_node->name, local->loc->path); STACK_WIND (frame, afr_lookup_unlock_cbk, local->lock_node, local->lock_node->mops->unlock, lock_path); freee (lock_path); } else afr_sync_ownership_permission (frame); return; } if (local->lock_node) { local->fd = fd_create (local->loc->inode); for (i = 0; i < child_count; i++) { if (child_errno[i] != 0) continue; if (i == latest) { continue; } if (ashptr[latest].ctime > ashptr[i].ctime) { ashptr[i].repair = 1; continue; } if (ashptr[latest].ctime == ashptr[i].ctime && ashptr[latest].version > ashptr[i].version) { ashptr[i].repair = 1; } } local->latest = latest; afr_lookup_directory_selfheal (frame); } else { char *lock_path = NULL; for (i = 0; i < child_count; i++) { if (state[i]) break; } if (i == child_count) { GF_ERROR (frame->this, "no child up for locking, returning EIO"); afr_loc_free(local->loc); afr_free_ashptr (local->ashptr, child_count, local->latest); freee (statptr); STACK_UNWIND (frame, -1, EIO, NULL, NULL, NULL); return; } local->lock_node = children[i]; asprintf (&lock_path, "/%s%s", local->lock_node->name, local->loc->path); AFR_DEBUG_FMT (frame->this, "locking (%s on %s)", lock_path, local->lock_node->name); /* lets lock the first alive node */ STACK_WIND (frame, afr_lookup_lock_cbk, children[i], children[i]->mops->lock, lock_path); freee (lock_path); } return;}int32_tafr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -