📄 posix-locks.c
字号:
/* Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> This file is part of GlusterFS. GlusterFS is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GlusterFS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.*/#include <unistd.h>#include <fcntl.h>#include <limits.h>#include <pthread.h>#ifndef _CONFIG_H#define _CONFIG_H#include "config.h"#endif#include "glusterfs.h"#include "compat.h"#include "xlator.h"#include "inode.h"#include "logging.h"#include "common-utils.h"#include "posix-locks.h"#ifndef LLONG_MAX#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */#endif /* LLONG_MAX *//* Forward declarations */static posix_lock_t * delete_lock (pl_inode_t *, posix_lock_t *);static pl_rw_req_t * delete_rw_req (pl_inode_t *, pl_rw_req_t *);static void destroy_lock (posix_lock_t *);static void do_blocked_rw (pl_inode_t *);static int rw_allowable (pl_inode_t *, posix_lock_t *, rw_op_t);#ifdef _POSIX_LOCKS_DEBUGstatic voidprint_lock (posix_lock_t *lock){ switch (lock->fl_type) { case F_RDLCK: printf ("READ"); break; case F_WRLCK: printf ("WRITE"); break; case F_UNLCK: printf ("UNLOCK"); break; } printf (" (%u, ", lock->fl_start); printf ("%u), ", lock->fl_end); printf ("pid = %lu\n", lock->client_pid); fflush (stdout);}static voidprint_flock (struct flock *lock){ switch (lock->l_type) { case F_RDLCK: printf ("READ"); break; case F_WRLCK: printf ("WRITE"); break; case F_UNLCK: printf ("UNLOCK"); break; } printf (" (%u, ", lock->l_start); printf ("%u), ", lock->l_start+lock->l_len); printf ("pid = %lu\n", lock->l_pid); fflush (stdout);}#endif /* _POSIX_LOCKS_DEBUG *//* Insert an rw request into the inode's rw list */static pl_rw_req_t *insert_rw_req (pl_inode_t *inode, pl_rw_req_t *rw){ rw->next = inode->rw_reqs; rw->prev = NULL; if (inode->rw_reqs) inode->rw_reqs->prev = rw; inode->rw_reqs = rw; return rw;}/* Delete an rw request from the inode's rw list */static pl_rw_req_t *delete_rw_req (pl_inode_t *inode, pl_rw_req_t *rw){ if (rw == inode->rw_reqs) { inode->rw_reqs = rw->next; if (inode->rw_reqs) inode->rw_reqs->prev = NULL; } else { pl_rw_req_t *prev = rw->prev; if (prev) prev->next = rw->next; if (rw->next) rw->next->prev = prev; } return rw;}/* Create a new posix_lock_t */static posix_lock_t *new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid){ posix_lock_t *lock = (posix_lock_t *)calloc (1, sizeof (posix_lock_t)); lock->fl_start = flock->l_start; lock->fl_type = flock->l_type; if (flock->l_len == 0) lock->fl_end = LLONG_MAX; else lock->fl_end = flock->l_start + flock->l_len - 1; lock->transport = transport; lock->client_pid = client_pid; return lock;}/* Destroy a posix_lock */static voiddestroy_lock (posix_lock_t *lock){ if (lock->user_flock) free (lock->user_flock); free (lock);}/* Convert a posix_lock to a struct flock */static voidposix_lock_to_flock (posix_lock_t *lock, struct flock *flock){ flock->l_start = lock->fl_start; flock->l_type = lock->fl_type; flock->l_len = lock->fl_end == LLONG_MAX ? 0 : lock->fl_end - lock->fl_start + 1; flock->l_pid = lock->client_pid;}/* Insert the lock into the inode's lock list */static posix_lock_t *insert_lock (pl_inode_t *inode, posix_lock_t *lock){ posix_lock_t *l, *prev; if (inode->locks) { prev = inode->locks; l = prev->next; while (l) { prev = l; l = l->next; } prev->next = lock; lock->prev = prev; lock->next = NULL; } else { inode->locks = lock; lock->prev = NULL; lock->next = NULL; } return lock;}/* Delete a lock from the inode's lock list */static posix_lock_t *delete_lock (pl_inode_t *inode, posix_lock_t *lock){ if (lock == inode->locks) { inode->locks = lock->next; if (inode->locks) inode->locks->prev = NULL; } else { posix_lock_t *prev = lock->prev; if (prev) prev->next = lock->next; if (lock->next) lock->next->prev = prev; } return lock;}/* Return true if the locks overlap, false otherwise */static intlocks_overlap (posix_lock_t *l1, posix_lock_t *l2){ /* Note: FUSE always gives us absolute offsets, so no need to worry about SEEK_CUR or SEEK_END */ return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start));}/* Return true if the locks have the same owner */static intsame_owner (posix_lock_t *l1, posix_lock_t *l2){ return ((l1->client_pid == l2->client_pid) && (l1->transport == l2->transport));}/* Delete all F_UNLCK locks */static voiddelete_unlck_locks (pl_inode_t *inode){ posix_lock_t *l = inode->locks; while (l) { if (l->fl_type == F_UNLCK) { delete_lock (inode, l); destroy_lock (l); } l = l->next; }}/* Add two locks */static posix_lock_t *add_locks (posix_lock_t *l1, posix_lock_t *l2){ posix_lock_t *sum = calloc (1, sizeof (posix_lock_t)); sum->fl_start = min (l1->fl_start, l2->fl_start); sum->fl_end = max (l1->fl_end, l2->fl_end); return sum;}/* Subtract two locks */struct _values { posix_lock_t *locks[3];};/* {small} must always be contained inside {big} */static struct _valuessubtract_locks (posix_lock_t *big, posix_lock_t *small){ struct _values v = { .locks = {0, 0, 0} }; if ((big->fl_start == small->fl_start) && (big->fl_end == small->fl_end)) { /* both edges coincide with big */ v.locks[0] = calloc (1, sizeof (posix_lock_t)); memcpy (v.locks[0], big, sizeof (posix_lock_t)); v.locks[0]->fl_type = small->fl_type; } else if ((small->fl_start > big->fl_start) && (small->fl_end < big->fl_end)) { /* both edges lie inside big */ v.locks[0] = calloc (1, sizeof (posix_lock_t)); v.locks[1] = calloc (1, sizeof (posix_lock_t)); v.locks[2] = calloc (1, sizeof (posix_lock_t)); memcpy (v.locks[0], big, sizeof (posix_lock_t)); v.locks[0]->fl_end = small->fl_start - 1; memcpy (v.locks[1], small, sizeof (posix_lock_t)); memcpy (v.locks[2], big, sizeof (posix_lock_t)); v.locks[2]->fl_start = small->fl_end + 1; } /* one edge coincides with big */ else if (small->fl_start == big->fl_start) { v.locks[0] = calloc (1, sizeof (posix_lock_t)); v.locks[1] = calloc (1, sizeof (posix_lock_t)); memcpy (v.locks[0], big, sizeof (posix_lock_t)); v.locks[0]->fl_start = small->fl_end + 1; memcpy (v.locks[1], small, sizeof (posix_lock_t)); } else if (small->fl_end == big->fl_end) { v.locks[0] = calloc (1, sizeof (posix_lock_t)); v.locks[1] = calloc (1, sizeof (posix_lock_t)); memcpy (v.locks[0], big, sizeof (posix_lock_t)); v.locks[0]->fl_end = small->fl_start - 1; memcpy (v.locks[1], small, sizeof (posix_lock_t)); } else { gf_log ("posix-locks", GF_LOG_DEBUG, "unexpected case in subtract_locks"); } return v;}/* Start searching from {begin}, and return the first lock that conflicts, NULL if no conflict If {begin} is NULL, then start from the beginning of the list*/static posix_lock_t *first_overlap (pl_inode_t *inode, posix_lock_t *lock, posix_lock_t *begin){ posix_lock_t *l; if (!begin) return NULL; l = begin; while (l) { if (l->blocked) { l = l->next; continue; } if (locks_overlap (l, lock)) return l; l = l->next; } return NULL;}static voidgrant_blocked_locks (pl_inode_t *inode){ posix_lock_t *l = inode->locks; while (l) { if (l->blocked) { posix_lock_t *conf = first_overlap (inode, l, inode->locks); if (conf == NULL) { l->blocked = 0; posix_lock_to_flock (l, l->user_flock);#ifdef _POSIX_LOCKS_DEBUG printf ("[UNBLOCKING] "); print_lock (l);#endif STACK_UNWIND (l->frame, 0, 0, l->user_flock); } } l = l->next; }}static posix_lock_t *posix_getlk (pl_inode_t *inode, posix_lock_t *lock){ posix_lock_t *conf = first_overlap (inode, lock, inode->locks); if (conf == NULL) { lock->fl_type = F_UNLCK; return lock; } return conf;}/* Return true if lock is grantable */static intlock_grantable (pl_inode_t *inode, posix_lock_t *lock){ posix_lock_t *l = inode->locks; while (l) { if (!l->blocked && locks_overlap (lock, l)) { if (((l->fl_type == F_WRLCK) || (lock->fl_type == F_WRLCK)) && (lock->fl_type != F_UNLCK) && !same_owner (l, lock)) { return 0; } } l = l->next; } return 1;}static voidinsert_and_merge (pl_inode_t *inode, posix_lock_t *lock){ posix_lock_t *conf = first_overlap (inode, lock, inode->locks); while (conf) { if (same_owner (conf, lock)) { if (conf->fl_type == lock->fl_type) { posix_lock_t *sum = add_locks (lock, conf); sum->fl_type = lock->fl_type; sum->transport = lock->transport; sum->client_pid = lock->client_pid; delete_lock (inode, conf); destroy_lock (conf); destroy_lock (lock); insert_and_merge (inode, sum); return; } else { posix_lock_t *sum = add_locks (lock, conf); int i; sum->fl_type = conf->fl_type; sum->transport = conf->transport; sum->client_pid = conf->client_pid; struct _values v = subtract_locks (sum, lock); delete_lock (inode, conf); destroy_lock (conf); for (i = 0; i < 3; i++) { if (v.locks[i]) { insert_and_merge (inode, v.locks[i]); } } delete_unlck_locks (inode); do_blocked_rw (inode); grant_blocked_locks (inode); return; } } if (lock->fl_type == F_UNLCK) { conf = first_overlap (inode, lock, conf->next); continue; } if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { insert_lock (inode, lock); return; } } /* no conflicts, so just insert */ if (lock->fl_type != F_UNLCK) { insert_lock (inode, lock); }}intposix_setlk (pl_inode_t *inode, posix_lock_t *lock, int can_block){ errno = 0; if (lock_grantable (inode, lock)) { insert_and_merge (inode, lock); } else if (can_block) {#ifdef _POSIX_LOCKS_DEBUG printf ("[BLOCKING]: "); print_lock (lock);#endif lock->blocked = 1; insert_lock (inode, lock); return -1; } else { errno = EAGAIN; return -1; } return 0;}/* fops */struct _truncate_ops { void *loc_or_fd; off_t offset; enum {TRUNCATE, FTRUNCATE} op;};int32_tpl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct stat *buf){ STACK_UNWIND (frame, op_ret, op_errno, buf); return 0;}int truncate_allowed (pl_inode_t *inode, transport_t *transport, pid_t client_pid, off_t offset){ posix_lock_t *region = calloc (1, sizeof (posix_lock_t)); region->fl_start = offset; region->fl_end = LLONG_MAX; region->transport = transport; region->client_pid = client_pid; posix_lock_t *l = inode->locks; while (l) { if (!l->blocked && locks_overlap (region, l) && !same_owner (region, l)) { free (region); return 0; } l = l->next; } free (region); return 1;}static int32_ttruncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct stat *buf){ posix_locks_private_t *priv = (posix_locks_private_t *)this->private; struct _truncate_ops *local = (struct _truncate_ops *)frame->local; dict_t *inode_ctx; if (op_ret != 0) { gf_log (this->name, GF_LOG_ERROR, "got errno %d from child", op_errno); STACK_UNWIND (frame, -1, op_errno, buf); return 0; } if (local->op == TRUNCATE) inode_ctx = ((loc_t *)local->loc_or_fd)->inode->ctx; else inode_ctx = ((fd_t *)local->loc_or_fd)->inode->ctx; data_t *inode_data = dict_get (inode_ctx, this->name); pl_inode_t *inode; if (inode_data == NULL) { mode_t st_mode; inode = calloc (1, sizeof (pl_inode_t)); if (local->op == TRUNCATE) st_mode = ((loc_t *)local->loc_or_fd)->inode->st_mode; else st_mode = ((fd_t *)local->loc_or_fd)->inode->st_mode; if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP)) inode->mandatory = 1; dict_set (inode_ctx, this->name, bin_to_data (inode, sizeof (inode))); } else { inode = (pl_inode_t *)data_to_bin (inode_data); } if (inode && priv->mandatory && inode->mandatory && !truncate_allowed (inode, frame->root->trans, frame->root->pid, local->offset)) { gf_log (this->name, GF_LOG_ERROR, "returning EAGAIN"); STACK_UNWIND (frame, -1, EAGAIN, buf); return 0; } switch (local->op) { case TRUNCATE: STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate, (loc_t *)local->loc_or_fd, local->offset); break; case FTRUNCATE: STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate, (fd_t *)local->loc_or_fd, local->offset); break; } return 0;}int32_t pl_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset){ GF_ERROR_IF_NULL (this); struct _truncate_ops *local = calloc (1, sizeof (struct _truncate_ops)); local->loc_or_fd = loc; local->offset = offset; local->op = TRUNCATE; frame->local = local; STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->stat, loc); return 0;}int32_t pl_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset){ struct _truncate_ops *local = calloc (1, sizeof (struct _truncate_ops)); local->loc_or_fd = fd; local->offset = offset; local->op = FTRUNCATE; frame->local = local; STACK_WIND (frame, truncate_stat_cbk,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -