diff options
Diffstat (limited to 'xlators/storage/posix/src/posix-common.c')
| -rw-r--r-- | xlators/storage/posix/src/posix-common.c | 250 |
1 files changed, 203 insertions, 47 deletions
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c index f0d8e3fe0c2..f10722ec3fb 100644 --- a/xlators/storage/posix/src/posix-common.c +++ b/xlators/storage/posix/src/posix-common.c @@ -26,7 +26,6 @@ #include <signal.h> #include <sys/uio.h> #include <unistd.h> -#include <ftw.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -36,15 +35,7 @@ #include <fcntl.h> #endif /* HAVE_LINKAT */ -#include <glusterfs/glusterfs.h> -#include <glusterfs/checksum.h> -#include <glusterfs/dict.h> -#include <glusterfs/logging.h> -#include "posix.h" #include "posix-inode-handle.h" -#include <glusterfs/xlator.h> -#include <glusterfs/defaults.h> -#include <glusterfs/common-utils.h> #include <glusterfs/compat-errno.h> #include <glusterfs/compat.h> #include <glusterfs/byte-order.h> @@ -53,7 +44,6 @@ #include <glusterfs/locking.h> #include <glusterfs/timer.h> #include "glusterfs3-xdr.h" -#include <glusterfs/hashfn.h> #include "posix-aio.h" #include <glusterfs/glusterfs-acl.h> #include "posix-messages.h" @@ -111,13 +101,13 @@ posix_priv(xlator_t *this) struct posix_private *priv = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN]; + if (!this) + return 0; + (void)snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); gf_proc_dump_add_section("%s", key_prefix); - if (!this) - return 0; - priv = this->private; if (!priv) @@ -128,7 +118,6 @@ posix_priv(xlator_t *this) gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value)); gf_proc_dump_write("max_write", "%" PRId64, GF_ATOMIC_GET(priv->write_value)); - gf_proc_dump_write("nr_files", "%" PRId64, GF_ATOMIC_GET(priv->nr_files)); return 0; } @@ -146,16 +135,55 @@ int32_t posix_notify(xlator_t *this, int32_t event, void *data, ...) { xlator_t *victim = data; + struct posix_private *priv = this->private; + int ret = 0; + struct timespec sleep_till = { + 0, + }; + glusterfs_ctx_t *ctx = this->ctx; switch (event) { case GF_EVENT_PARENT_UP: { - /* Tell the parent that posix xlator is up */ + /* Notify the parent that posix xlator is up */ default_notify(this, GF_EVENT_CHILD_UP, data); } break; case GF_EVENT_PARENT_DOWN: { if (!victim->cleanup_starting) break; + + if (priv->janitor) { + pthread_mutex_lock(&priv->janitor_mutex); + { + priv->janitor_task_stop = _gf_true; + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, + priv->janitor); + if (!ret) { + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + /* Wait to set janitor_task flag to _gf_false by + * janitor_task_done */ + while (priv->janitor_task_stop) { + (void)pthread_cond_timedwait(&priv->janitor_cond, + &priv->janitor_mutex, + &sleep_till); + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + } + } + } + pthread_mutex_unlock(&priv->janitor_mutex); + GF_FREE(priv->janitor); + } + priv->janitor = NULL; + pthread_mutex_lock(&ctx->fd_lock); + { + while (priv->rel_fdcount > 0) { + pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", victim->name); default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); @@ -345,11 +373,20 @@ posix_reconfigure(xlator_t *this, dict_t *options) " fallback to <hostname>:<export>"); } - GF_OPTION_RECONF("reserve", priv->disk_reserve, options, uint32, out); + GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size, + out); + /* option can be any one of percent or bytes */ + priv->disk_unit = 0; + if (priv->disk_reserve < 100.0) + priv->disk_unit = 'p'; + if (priv->disk_reserve) { ret = posix_spawn_disk_space_check_thread(this); - if (ret) + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, + "Getting disk space check from thread failed"); goto out; + } } GF_OPTION_RECONF("health-check-interval", priv->health_check_interval, @@ -515,6 +552,30 @@ posix_create_unlink_dir(xlator_t *this) return 0; } +int +posix_create_open_directory_based_fd(xlator_t *this, int pdirfd, char *dir_name) +{ + int ret = -1; + + ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0); + if (ret < 0 && errno == ENOENT) { + ret = sys_mkdirat(pdirfd, dir_name, 0700); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "Creating directory %s failed", dir_name); + goto out; + } + ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0); + if (ret < 0 && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error mkdir hash-1 %s ", dir_name); + goto out; + } + } +out: + return ret; +} + /** * init - */ @@ -541,7 +602,7 @@ posix_init(xlator_t *this) uuid_t gfid = { 0, }; - uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; char *guuid = NULL; int32_t uid = -1; int32_t gid = -1; @@ -551,6 +612,15 @@ posix_init(xlator_t *this) int force_directory = -1; int create_mask = -1; int create_directory_mask = -1; + char dir_handle[PATH_MAX] = { + 0, + }; + int i; + char fhash[4] = { + 0, + }; + int hdirfd = -1; + char value; dir_data = dict_get(this->options, "directory"); @@ -591,7 +661,12 @@ posix_init(xlator_t *this) } _private->base_path = gf_strdup(dir_data->data); - _private->base_path_length = strlen(_private->base_path); + _private->base_path_length = dir_data->len - 1; + + _private->dirfd = -1; + _private->mount_lock = -1; + for (i = 0; i < 256; i++) + _private->arrdfd[i] = -1; ret = dict_get_str(this->options, "hostname", &_private->hostname); if (ret) { @@ -607,16 +682,11 @@ posix_init(xlator_t *this) } /* Check for Extended attribute support, if not present, log it */ - op_ret = sys_lsetxattr(dir_data->data, "trusted.glusterfs.test", "working", - 8, 0); - if (op_ret != -1) { - ret = sys_lremovexattr(dir_data->data, "trusted.glusterfs.test"); - if (ret) { - gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_INVALID_OPTION, - "failed to remove xattr: " - "trusted.glusterfs.test"); - } - } else { + size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value)); + + if ((size == -1) && (errno == EOPNOTSUPP)) { + gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR, + "getxattr returned %zd", size); tmp_data = dict_get(this->options, "mandate-attribute"); if (tmp_data) { if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) { @@ -776,7 +846,6 @@ posix_init(xlator_t *this) } LOCK_INIT(&_private->lock); - GF_ATOMIC_INIT(_private->nr_files, 0); GF_ATOMIC_INIT(_private->read_value, 0); GF_ATOMIC_INIT(_private->write_value, 0); @@ -866,8 +935,9 @@ posix_init(xlator_t *this) /* performing open dir on brick dir locks the brick dir * and prevents it from being unmounted */ - _private->mount_lock = sys_opendir(dir_data->data); - if (!_private->mount_lock) { + _private->mount_lock = sys_open(dir_data->data, (O_DIRECTORY | O_RDONLY), + 0); + if (_private->mount_lock < 0) { ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED, @@ -911,6 +981,28 @@ posix_init(xlator_t *this) } this->private = (void *)_private; + snprintf(dir_handle, sizeof(dir_handle), "%s/%s", _private->base_path, + GF_HIDDEN_PATH); + hdirfd = posix_create_open_directory_based_fd(this, _private->mount_lock, + dir_handle); + if (hdirfd < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error open directory failed for dir %s", dir_handle); + ret = -1; + goto out; + } + _private->dirfd = hdirfd; + for (i = 0; i < 256; i++) { + snprintf(fhash, sizeof(fhash), "%02x", i); + _private->arrdfd[i] = posix_create_open_directory_based_fd(this, hdirfd, + fhash); + if (_private->arrdfd[i] < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error openat failed for file %s", fhash); + ret = -1; + goto out; + } + } op_ret = posix_handle_init(this); if (op_ret == -1) { @@ -968,11 +1060,21 @@ posix_init(xlator_t *this) _private->disk_space_check_active = _gf_false; _private->disk_space_full = 0; - GF_OPTION_INIT("reserve", _private->disk_reserve, uint32, out); + + GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out); + + /* option can be any one of percent or bytes */ + _private->disk_unit = 0; + if (_private->disk_reserve < 100.0) + _private->disk_unit = 'p'; + if (_private->disk_reserve) { ret = posix_spawn_disk_space_check_thread(this); - if (ret) + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, + "Getting disk space check from thread failed "); goto out; + } } _private->health_check_active = _gf_false; @@ -989,7 +1091,11 @@ posix_init(xlator_t *this) pthread_mutex_init(&_private->fsync_mutex, NULL); pthread_cond_init(&_private->fsync_cond, NULL); + pthread_mutex_init(&_private->janitor_mutex, NULL); + pthread_cond_init(&_private->janitor_cond, NULL); + pthread_cond_init(&_private->fd_cond, NULL); INIT_LIST_HEAD(&_private->fsyncs); + _private->rel_fdcount = 0; ret = posix_spawn_ctx_janitor_thread(this); if (ret) goto out; @@ -1066,9 +1172,27 @@ posix_init(xlator_t *this) out); GF_OPTION_INIT("ctime", _private->ctime, bool, out); + out: if (ret) { if (_private) { + if (_private->dirfd >= 0) { + sys_close(_private->dirfd); + _private->dirfd = -1; + } + + for (i = 0; i < 256; i++) { + if (_private->arrdfd[i] >= 0) { + sys_close(_private->arrdfd[i]); + _private->arrdfd[i] = -1; + } + } + /*unlock brick dir*/ + if (_private->mount_lock >= 0) { + (void)sys_close(_private->mount_lock); + _private->mount_lock = -1; + } + GF_FREE(_private->base_path); GF_FREE(_private->hostname); @@ -1088,7 +1212,10 @@ posix_fini(xlator_t *this) { struct posix_private *priv = this->private; gf_boolean_t health_check = _gf_false; + glusterfs_ctx_t *ctx = this->ctx; + uint32_t count; int ret = 0; + int i = 0; if (!priv) return; @@ -1099,6 +1226,18 @@ posix_fini(xlator_t *this) } UNLOCK(&priv->lock); + if (priv->dirfd >= 0) { + sys_close(priv->dirfd); + priv->dirfd = -1; + } + + for (i = 0; i < 256; i++) { + if (priv->arrdfd[i] >= 0) { + sys_close(priv->arrdfd[i]); + priv->arrdfd[i] = -1; + } + } + if (health_check) { (void)gf_thread_cleanup_xint(priv->health_check); priv->health_check = 0; @@ -1109,6 +1248,7 @@ posix_fini(xlator_t *this) (void)gf_thread_cleanup_xint(priv->disk_space_check); priv->disk_space_check = 0; } + if (priv->janitor) { /*TODO: Make sure the synctask is also complete */ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); @@ -1116,19 +1256,39 @@ posix_fini(xlator_t *this) gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED, "Failed to delete janitor timer"); } + GF_FREE(priv->janitor); priv->janitor = NULL; } + + pthread_mutex_lock(&ctx->fd_lock); + { + count = --ctx->pxl_count; + if (count == 0) { + pthread_cond_signal(&ctx->fd_cond); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + if (count == 0) { + pthread_join(ctx->janitor, NULL); + } + if (priv->fsyncer) { (void)gf_thread_cleanup_xint(priv->fsyncer); priv->fsyncer = 0; } /*unlock brick dir*/ - if (priv->mount_lock) - (void)sys_closedir(priv->mount_lock); + if (priv->mount_lock >= 0) { + (void)sys_close(priv->mount_lock); + priv->mount_lock = -1; + } GF_FREE(priv->base_path); LOCK_DESTROY(&priv->lock); pthread_mutex_destroy(&priv->fsync_mutex); + pthread_cond_destroy(&priv->fsync_cond); + pthread_mutex_destroy(&priv->janitor_mutex); + pthread_cond_destroy(&priv->janitor_cond); GF_FREE(priv->hostname); GF_FREE(priv->trash_path); GF_FREE(priv); @@ -1200,7 +1360,7 @@ struct volume_options posix_options[] = { {.key = {"health-check-timeout"}, .type = GF_OPTION_TYPE_INT, .min = 0, - .default_value = "10", + .default_value = "20", .validate = GF_OPT_VALIDATE_MIN, .description = "Interval in seconds to wait aio_write finish for health check, " @@ -1208,11 +1368,11 @@ struct volume_options posix_options[] = { .op_version = {GD_OP_VERSION_4_0_0}, .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, {.key = {"reserve"}, - .type = GF_OPTION_TYPE_INT, + .type = GF_OPTION_TYPE_PERCENT_OR_SIZET, .min = 0, .default_value = "1", .validate = GF_OPT_VALIDATE_MIN, - .description = "Percentage of disk space to be reserved." + .description = "Percentage/Size of disk space to be reserved." " Set to 0 to disable", .op_version = {GD_OP_VERSION_3_13_0}, .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, @@ -1306,24 +1466,21 @@ struct volume_options posix_options[] = { .min = 0000, .max = 0777, .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Mode bit permission that will always be set on a file."}, {.key = {"force-directory-mode"}, .type = GF_OPTION_TYPE_INT, .min = 0000, .max = 0777, .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Mode bit permission that will be always set on directory"}, {.key = {"create-mask"}, .type = GF_OPTION_TYPE_INT, .min = 0000, .max = 0777, .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Any bit not set here will be removed from the" "modes set on a file when it is created"}, {.key = {"create-directory-mask"}, @@ -1331,8 +1488,7 @@ struct volume_options posix_options[] = { .min = 0000, .max = 0777, .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Any bit not set here will be removed from the" "modes set on a directory when it is created"}, {.key = {"max-hardlinks"}, |
