📄 stripe.c
字号:
/* Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> This file is part of GlusterFS. GlusterFS is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GlusterFS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.*//** * xlators/cluster/stripe: * Stripe translator, stripes the data accross its child nodes, * as per the options given in the spec file. The striping works * fairly simple. It writes files at different offset as per * calculation. So, 'ls -l' output at the real posix level will * show file size bigger than the actual size. But when one does * 'df' or 'du <file>', real size of the file on the server is shown. * * WARNING: * Stripe translator can't regenerate data if a child node gets disconnected. * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its * very much necessary, or else, use it in combination with AFR, to have a * backup copy. */#ifndef _CONFIG_H#define _CONFIG_H#include "config.h"#endif#include "xlator.h"#include "logging.h"#include "defaults.h"#include <fnmatch.h>#define STRIPE_DEFAULT_BLOCK_SIZE 1048576#define STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \ if (!(_loc && _loc->inode && _loc->inode->ctx && \ dict_get (_loc->inode->ctx, this->name))) { \ TRAP_ON (!(_loc && _loc->inode && _loc->inode->ctx && \ dict_get (_loc->inode->ctx, this->name))); \ STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \ return 0; \ } \} while(0)struct stripe_local;/** * struct stripe_options : This keeps the pattern and the block-size * information, which is used for striping on a file. */struct stripe_options { struct stripe_options *next; char path_pattern[256]; size_t block_size;};/** * Private structure for stripe translator */struct stripe_private { struct stripe_options *pattern; xlator_t **xl_array; gf_lock_t lock; int8_t nodes_down; int8_t first_child_down; int8_t child_count; int8_t xattr_check[256]; /* Check for xattr support in underlying FS */};/** * Used to keep info about the replies received from fops->readv calls */struct readv_replies { struct iovec *vector; int32_t count; //count of vector int32_t op_ret; //op_ret of readv int32_t op_errno; struct stat stbuf; /* 'stbuf' is also a part of reply */};/** * Local structure to be passed with all the frames in case of STACK_WIND */struct stripe_local { struct stripe_local *next; call_frame_t *orig_frame; // /* Used by _cbk functions */ int8_t revalidate; int8_t failed; int8_t unwind; int8_t striped; int32_t node_index; int32_t call_count; int32_t wind_count; // used instead of child_cound in case of read and write */ int32_t op_ret; int32_t op_errno; int32_t count; int32_t flags; char *path; struct stat stbuf; struct readv_replies *replies; struct statvfs statvfs_buf; dir_entry_t *entry; struct xlator_stats stats; inode_t *inode; /* For File I/O fops */ dict_t *ctx; /* General usage */ off_t offset; off_t stripe_size; int8_t *list; struct flock lock; fd_t *fd; void *value;};typedef struct stripe_local stripe_local_t;typedef struct stripe_private stripe_private_t;/** * stripe_get_matching_bs - Get the matching block size for the given path. */int32_t stripe_get_matching_bs (const char *path, struct stripe_options *opts) { struct stripe_options *trav = opts; char *pathname = strdup (path); size_t block_size = STRIPE_DEFAULT_BLOCK_SIZE; /* 1MB default */ while (trav) { if (fnmatch (trav->path_pattern, pathname, FNM_NOESCAPE) == 0) { block_size = trav->block_size; break; } trav = trav->next; } free (pathname); return block_size;}/* * stripe_common_cbk - */int32_tstripe_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno){ STACK_UNWIND (frame, op_ret, op_errno); return 0;}/** * stripe_stack_unwind_cbk - This function is used for all the _cbk without * any extra arguments (other than the minimum given) * This is called from functions like forget,fsync,unlink,rmdir,close,closedir etc. * */int32_t stripe_stack_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno){ int32_t callcnt = 0; stripe_local_t *local = frame->local; LOCK (&frame->lock); { callcnt = --local->call_count; if (op_ret == -1) { gf_log (this->name, GF_LOG_WARNING, "%s returned errno %d", ((call_frame_t *)cookie)->this->name, op_errno); if (op_errno == ENOTCONN) { local->failed = 1; } local->op_errno = op_errno; } if (op_ret >= 0) local->op_ret = op_ret; } UNLOCK (&frame->lock); if (!callcnt) { if (local->failed) { local->op_ret = -1; } STACK_UNWIND (frame, local->op_ret, local->op_errno); } return 0;}int32_t stripe_common_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct stat *buf){ STACK_UNWIND (frame, op_ret, op_errno, buf); return 0;}/** * stripe_stack_unwind_buf_cbk - This function is used for all the _cbk with * 'struct stat *buf' as extra argument (other than minimum) * This is called from functions like, chmod,fchmod,chown,fchown,truncate,ftruncate, * utimens etc. * * @cookie - this argument should be always 'xlator_t *' of child node */int32_t stripe_stack_unwind_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct stat *buf){ int32_t callcnt = 0; stripe_local_t *local = frame->local; LOCK (&frame->lock); { callcnt = --local->call_count; if (op_ret == -1) { gf_log (this->name, GF_LOG_WARNING, "%s returned errno %d", ((call_frame_t *)cookie)->this->name, op_errno); if (op_errno == ENOTCONN) { local->failed = 1; } local->op_errno = op_errno; } if (op_ret == 0) { local->op_ret = 0; if (local->stbuf.st_blksize == 0) local->stbuf = *buf; if (FIRST_CHILD(this) == ((call_frame_t *)cookie)->this) { /* Always, pass the inode number of first child to the above layer */ local->stbuf.st_ino = buf->st_ino; } if (local->stbuf.st_size < buf->st_size) local->stbuf.st_size = buf->st_size; local->stbuf.st_blocks += buf->st_blocks; if (local->stbuf.st_blksize != buf->st_blksize) { /* TODO: add to blocks in terms of original block size */ } } } UNLOCK (&frame->lock); if (!callcnt) { if (local->failed) { local->op_ret = -1; } STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf); } return 0;}int32_t stripe_common_inode_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct stat *buf){ dict_set (inode->ctx, this->name, data_from_int8 (1)); // not stripped STACK_UNWIND (frame, op_ret, op_errno, inode, buf); return 0;}/** * stripe_stack_unwind_inode_cbk - This is called by the function like, * link (), symlink (), mkdir (), mknod () * This creates a inode for new inode. It keeps a list of all * the inodes received from the child nodes. It is used while * forwarding any fops to child nodes. * */int32_t stripe_stack_unwind_inode_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct stat *buf){ int32_t callcnt = 0; stripe_local_t *local = frame->local; LOCK (&frame->lock); { callcnt = --local->call_count; if (op_ret == -1) { gf_log (this->name, GF_LOG_WARNING, "%s returned errno %d", ((call_frame_t *)cookie)->this->name, op_errno); if (op_errno == ENOTCONN) { local->failed = 1; } local->op_errno = op_errno; } if (op_ret >= 0) { local->op_ret = 0; if (local->stbuf.st_blksize == 0) { local->inode = inode; local->stbuf = *buf; } if (FIRST_CHILD(this) == ((call_frame_t *)cookie)->this) { local->stbuf.st_ino = buf->st_ino; /* Increment striped's value, as if we set it to some value, it may * overwrite earlier value */ local->striped++; } else { local->striped = 2; } if (local->stbuf.st_size < buf->st_size) local->stbuf.st_size = buf->st_size; local->stbuf.st_blocks += buf->st_blocks; if (local->stbuf.st_blksize != buf->st_blksize) { /* TODO: add to blocks in terms of original block size */ } } } UNLOCK (&frame->lock); if (!callcnt) { if (local->failed) { local->op_ret = -1; } if (local->op_ret == 0) { if (!local->revalidate) { if (local->striped == 1 && !S_ISDIR(local->stbuf.st_mode)) { dict_set (local->inode->ctx, this->name, data_from_int8 (1)); // not stripped } else { dict_set (local->inode->ctx, this->name, data_from_int8 (2)); // stripped } } } STACK_UNWIND (frame, local->op_ret, local->op_errno, local->inode, &local->stbuf); } return 0;}int32_t stripe_stack_unwind_inode_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct stat *buf, dict_t *dict){ int32_t callcnt = 0; stripe_local_t *local = frame->local; LOCK (&frame->lock); { callcnt = --local->call_count; if (op_ret == -1) { if (op_errno != ENOENT) gf_log (this->name, GF_LOG_WARNING, "%s returned errno %d", ((call_frame_t *)cookie)->this->name, op_errno); if (op_errno == ENOTCONN) { local->failed = 1; } local->op_errno = op_errno; } if (op_ret >= 0) { local->op_ret = 0; if (local->stbuf.st_blksize == 0) { local->inode = inode; local->stbuf = *buf; } if (FIRST_CHILD(this) == ((call_frame_t *)cookie)->this) { local->stbuf.st_ino = buf->st_ino; /* Increment striped's value, as if we set it to some value, it may * overwrite earlier value */ local->striped++; } else { local->striped = 2; } if (local->stbuf.st_size < buf->st_size) local->stbuf.st_size = buf->st_size; local->stbuf.st_blocks += buf->st_blocks; if (local->stbuf.st_blksize != buf->st_blksize) { /* TODO: add to blocks in terms of original block size */ } } } UNLOCK (&frame->lock); if (!callcnt) { if (local->failed) { local->op_ret = -1; } if (local->op_ret == 0) { if (!local->revalidate) { if (local->striped == 1 && !S_ISDIR(local->stbuf.st_mode)) { dict_set (local->inode->ctx, this->name, data_from_int8 (1)); // not stripped } else { dict_set (local->inode->ctx, this->name, data_from_int8 (2)); // stripped } } } STACK_UNWIND (frame, local->op_ret, local->op_errno, local->inode, &local->stbuf, dict); } return 0;}/** * stripe_lookup - */int32_t stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t need_xattr){ stripe_local_t *local = NULL; xlator_list_t *trav = NULL; stripe_private_t *priv = this->private; int32_t striped = 0; if (!(loc && loc->inode && loc->inode->ctx)) { gf_log (this->name, GF_LOG_ERROR, "wrong argument, returning EINVAL"); STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); return 0; } /* Initialization */ local = calloc (1, sizeof (stripe_local_t)); local->op_ret = -1; frame->local = local; if (dict_get (loc->inode->ctx, this->name)) striped = data_to_int8 (dict_get (loc->inode->ctx, this->name)); if (!striped) { /* Everytime in stripe lookup, all child nodes should be looked up */ local->call_count = priv->child_count; trav = this->children; while (trav) { STACK_WIND (frame, stripe_stack_unwind_inode_lookup_cbk, trav->xlator, trav->xlator->fops->lookup, loc, need_xattr); trav = trav->next; } } else { local->revalidate = 1; local->inode = loc->inode; if (striped == 1) local->call_count = 1; else local->call_count = ((stripe_private_t *)this->private)->child_count; trav = this->children; while (trav) { STACK_WIND (frame, stripe_stack_unwind_inode_lookup_cbk, trav->xlator, trav->xlator->fops->lookup, loc, need_xattr); if (striped == 1) break; trav = trav->next; } } return 0;}/** * stripe_forget - */int32_t stripe_forget (call_frame_t *frame, xlator_t *this, inode_t *inode){ /* There is nothing to be done */ return 0;}/** * stripe_stat - */int32_tstripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc){ stripe_private_t *priv = this->private; xlator_list_t *trav = this->children; stripe_local_t *local = NULL; int8_t striped = 0; STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); striped = data_to_int8 (dict_get (loc->inode->ctx, this->name)); if (striped == 1) { STACK_WIND (frame, stripe_common_buf_cbk, trav->xlator, trav->xlator->fops->stat, loc); } else { /* Initialization */ local = calloc (1, sizeof (stripe_local_t)); local->op_ret = -1; frame->local = local; local->inode = loc->inode; local->call_count = priv->child_count; trav = this->children; while (trav) { STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator, trav->xlator->fops->stat, loc); trav = trav->next; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -