summaryrefslogtreecommitdiffstats
path: root/xlators/storage/posix/src/posix-common.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage/posix/src/posix-common.c')
-rw-r--r--xlators/storage/posix/src/posix-common.c250
1 files changed, 203 insertions, 47 deletions
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
index f0d8e3fe0c2..f10722ec3fb 100644
--- a/xlators/storage/posix/src/posix-common.c
+++ b/xlators/storage/posix/src/posix-common.c
@@ -26,7 +26,6 @@
#include <signal.h>
#include <sys/uio.h>
#include <unistd.h>
-#include <ftw.h>
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
@@ -36,15 +35,7 @@
#include <fcntl.h>
#endif /* HAVE_LINKAT */
-#include <glusterfs/glusterfs.h>
-#include <glusterfs/checksum.h>
-#include <glusterfs/dict.h>
-#include <glusterfs/logging.h>
-#include "posix.h"
#include "posix-inode-handle.h"
-#include <glusterfs/xlator.h>
-#include <glusterfs/defaults.h>
-#include <glusterfs/common-utils.h>
#include <glusterfs/compat-errno.h>
#include <glusterfs/compat.h>
#include <glusterfs/byte-order.h>
@@ -53,7 +44,6 @@
#include <glusterfs/locking.h>
#include <glusterfs/timer.h>
#include "glusterfs3-xdr.h"
-#include <glusterfs/hashfn.h>
#include "posix-aio.h"
#include <glusterfs/glusterfs-acl.h>
#include "posix-messages.h"
@@ -111,13 +101,13 @@ posix_priv(xlator_t *this)
struct posix_private *priv = NULL;
char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ if (!this)
+ return 0;
+
(void)snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type,
this->name);
gf_proc_dump_add_section("%s", key_prefix);
- if (!this)
- return 0;
-
priv = this->private;
if (!priv)
@@ -128,7 +118,6 @@ posix_priv(xlator_t *this)
gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value));
gf_proc_dump_write("max_write", "%" PRId64,
GF_ATOMIC_GET(priv->write_value));
- gf_proc_dump_write("nr_files", "%" PRId64, GF_ATOMIC_GET(priv->nr_files));
return 0;
}
@@ -146,16 +135,55 @@ int32_t
posix_notify(xlator_t *this, int32_t event, void *data, ...)
{
xlator_t *victim = data;
+ struct posix_private *priv = this->private;
+ int ret = 0;
+ struct timespec sleep_till = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = this->ctx;
switch (event) {
case GF_EVENT_PARENT_UP: {
- /* Tell the parent that posix xlator is up */
+ /* Notify the parent that posix xlator is up */
default_notify(this, GF_EVENT_CHILD_UP, data);
} break;
case GF_EVENT_PARENT_DOWN: {
if (!victim->cleanup_starting)
break;
+
+ if (priv->janitor) {
+ pthread_mutex_lock(&priv->janitor_mutex);
+ {
+ priv->janitor_task_stop = _gf_true;
+ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
+ priv->janitor);
+ if (!ret) {
+ timespec_now_realtime(&sleep_till);
+ sleep_till.tv_sec += 1;
+ /* Wait to set janitor_task flag to _gf_false by
+ * janitor_task_done */
+ while (priv->janitor_task_stop) {
+ (void)pthread_cond_timedwait(&priv->janitor_cond,
+ &priv->janitor_mutex,
+ &sleep_till);
+ timespec_now_realtime(&sleep_till);
+ sleep_till.tv_sec += 1;
+ }
+ }
+ }
+ pthread_mutex_unlock(&priv->janitor_mutex);
+ GF_FREE(priv->janitor);
+ }
+ priv->janitor = NULL;
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ while (priv->rel_fdcount > 0) {
+ pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock);
+ }
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
victim->name);
default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
@@ -345,11 +373,20 @@ posix_reconfigure(xlator_t *this, dict_t *options)
" fallback to <hostname>:<export>");
}
- GF_OPTION_RECONF("reserve", priv->disk_reserve, options, uint32, out);
+ GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size,
+ out);
+ /* option can be any one of percent or bytes */
+ priv->disk_unit = 0;
+ if (priv->disk_reserve < 100.0)
+ priv->disk_unit = 'p';
+
if (priv->disk_reserve) {
ret = posix_spawn_disk_space_check_thread(this);
- if (ret)
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED,
+ "Getting disk space check from thread failed");
goto out;
+ }
}
GF_OPTION_RECONF("health-check-interval", priv->health_check_interval,
@@ -515,6 +552,30 @@ posix_create_unlink_dir(xlator_t *this)
return 0;
}
+int
+posix_create_open_directory_based_fd(xlator_t *this, int pdirfd, char *dir_name)
+{
+ int ret = -1;
+
+ ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0);
+ if (ret < 0 && errno == ENOENT) {
+ ret = sys_mkdirat(pdirfd, dir_name, 0700);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "Creating directory %s failed", dir_name);
+ goto out;
+ }
+ ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0);
+ if (ret < 0 && errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "error mkdir hash-1 %s ", dir_name);
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
/**
* init -
*/
@@ -541,7 +602,7 @@ posix_init(xlator_t *this)
uuid_t gfid = {
0,
};
- uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
char *guuid = NULL;
int32_t uid = -1;
int32_t gid = -1;
@@ -551,6 +612,15 @@ posix_init(xlator_t *this)
int force_directory = -1;
int create_mask = -1;
int create_directory_mask = -1;
+ char dir_handle[PATH_MAX] = {
+ 0,
+ };
+ int i;
+ char fhash[4] = {
+ 0,
+ };
+ int hdirfd = -1;
+ char value;
dir_data = dict_get(this->options, "directory");
@@ -591,7 +661,12 @@ posix_init(xlator_t *this)
}
_private->base_path = gf_strdup(dir_data->data);
- _private->base_path_length = strlen(_private->base_path);
+ _private->base_path_length = dir_data->len - 1;
+
+ _private->dirfd = -1;
+ _private->mount_lock = -1;
+ for (i = 0; i < 256; i++)
+ _private->arrdfd[i] = -1;
ret = dict_get_str(this->options, "hostname", &_private->hostname);
if (ret) {
@@ -607,16 +682,11 @@ posix_init(xlator_t *this)
}
/* Check for Extended attribute support, if not present, log it */
- op_ret = sys_lsetxattr(dir_data->data, "trusted.glusterfs.test", "working",
- 8, 0);
- if (op_ret != -1) {
- ret = sys_lremovexattr(dir_data->data, "trusted.glusterfs.test");
- if (ret) {
- gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_INVALID_OPTION,
- "failed to remove xattr: "
- "trusted.glusterfs.test");
- }
- } else {
+ size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value));
+
+ if ((size == -1) && (errno == EOPNOTSUPP)) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR,
+ "getxattr returned %zd", size);
tmp_data = dict_get(this->options, "mandate-attribute");
if (tmp_data) {
if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) {
@@ -776,7 +846,6 @@ posix_init(xlator_t *this)
}
LOCK_INIT(&_private->lock);
- GF_ATOMIC_INIT(_private->nr_files, 0);
GF_ATOMIC_INIT(_private->read_value, 0);
GF_ATOMIC_INIT(_private->write_value, 0);
@@ -866,8 +935,9 @@ posix_init(xlator_t *this)
/* performing open dir on brick dir locks the brick dir
* and prevents it from being unmounted
*/
- _private->mount_lock = sys_opendir(dir_data->data);
- if (!_private->mount_lock) {
+ _private->mount_lock = sys_open(dir_data->data, (O_DIRECTORY | O_RDONLY),
+ 0);
+ if (_private->mount_lock < 0) {
ret = -1;
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED,
@@ -911,6 +981,28 @@ posix_init(xlator_t *this)
}
this->private = (void *)_private;
+ snprintf(dir_handle, sizeof(dir_handle), "%s/%s", _private->base_path,
+ GF_HIDDEN_PATH);
+ hdirfd = posix_create_open_directory_based_fd(this, _private->mount_lock,
+ dir_handle);
+ if (hdirfd < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "error open directory failed for dir %s", dir_handle);
+ ret = -1;
+ goto out;
+ }
+ _private->dirfd = hdirfd;
+ for (i = 0; i < 256; i++) {
+ snprintf(fhash, sizeof(fhash), "%02x", i);
+ _private->arrdfd[i] = posix_create_open_directory_based_fd(this, hdirfd,
+ fhash);
+ if (_private->arrdfd[i] < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "error openat failed for file %s", fhash);
+ ret = -1;
+ goto out;
+ }
+ }
op_ret = posix_handle_init(this);
if (op_ret == -1) {
@@ -968,11 +1060,21 @@ posix_init(xlator_t *this)
_private->disk_space_check_active = _gf_false;
_private->disk_space_full = 0;
- GF_OPTION_INIT("reserve", _private->disk_reserve, uint32, out);
+
+ GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out);
+
+ /* option can be any one of percent or bytes */
+ _private->disk_unit = 0;
+ if (_private->disk_reserve < 100.0)
+ _private->disk_unit = 'p';
+
if (_private->disk_reserve) {
ret = posix_spawn_disk_space_check_thread(this);
- if (ret)
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED,
+ "Getting disk space check from thread failed ");
goto out;
+ }
}
_private->health_check_active = _gf_false;
@@ -989,7 +1091,11 @@ posix_init(xlator_t *this)
pthread_mutex_init(&_private->fsync_mutex, NULL);
pthread_cond_init(&_private->fsync_cond, NULL);
+ pthread_mutex_init(&_private->janitor_mutex, NULL);
+ pthread_cond_init(&_private->janitor_cond, NULL);
+ pthread_cond_init(&_private->fd_cond, NULL);
INIT_LIST_HEAD(&_private->fsyncs);
+ _private->rel_fdcount = 0;
ret = posix_spawn_ctx_janitor_thread(this);
if (ret)
goto out;
@@ -1066,9 +1172,27 @@ posix_init(xlator_t *this)
out);
GF_OPTION_INIT("ctime", _private->ctime, bool, out);
+
out:
if (ret) {
if (_private) {
+ if (_private->dirfd >= 0) {
+ sys_close(_private->dirfd);
+ _private->dirfd = -1;
+ }
+
+ for (i = 0; i < 256; i++) {
+ if (_private->arrdfd[i] >= 0) {
+ sys_close(_private->arrdfd[i]);
+ _private->arrdfd[i] = -1;
+ }
+ }
+ /*unlock brick dir*/
+ if (_private->mount_lock >= 0) {
+ (void)sys_close(_private->mount_lock);
+ _private->mount_lock = -1;
+ }
+
GF_FREE(_private->base_path);
GF_FREE(_private->hostname);
@@ -1088,7 +1212,10 @@ posix_fini(xlator_t *this)
{
struct posix_private *priv = this->private;
gf_boolean_t health_check = _gf_false;
+ glusterfs_ctx_t *ctx = this->ctx;
+ uint32_t count;
int ret = 0;
+ int i = 0;
if (!priv)
return;
@@ -1099,6 +1226,18 @@ posix_fini(xlator_t *this)
}
UNLOCK(&priv->lock);
+ if (priv->dirfd >= 0) {
+ sys_close(priv->dirfd);
+ priv->dirfd = -1;
+ }
+
+ for (i = 0; i < 256; i++) {
+ if (priv->arrdfd[i] >= 0) {
+ sys_close(priv->arrdfd[i]);
+ priv->arrdfd[i] = -1;
+ }
+ }
+
if (health_check) {
(void)gf_thread_cleanup_xint(priv->health_check);
priv->health_check = 0;
@@ -1109,6 +1248,7 @@ posix_fini(xlator_t *this)
(void)gf_thread_cleanup_xint(priv->disk_space_check);
priv->disk_space_check = 0;
}
+
if (priv->janitor) {
/*TODO: Make sure the synctask is also complete */
ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
@@ -1116,19 +1256,39 @@ posix_fini(xlator_t *this)
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED,
"Failed to delete janitor timer");
}
+ GF_FREE(priv->janitor);
priv->janitor = NULL;
}
+
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ count = --ctx->pxl_count;
+ if (count == 0) {
+ pthread_cond_signal(&ctx->fd_cond);
+ }
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ if (count == 0) {
+ pthread_join(ctx->janitor, NULL);
+ }
+
if (priv->fsyncer) {
(void)gf_thread_cleanup_xint(priv->fsyncer);
priv->fsyncer = 0;
}
/*unlock brick dir*/
- if (priv->mount_lock)
- (void)sys_closedir(priv->mount_lock);
+ if (priv->mount_lock >= 0) {
+ (void)sys_close(priv->mount_lock);
+ priv->mount_lock = -1;
+ }
GF_FREE(priv->base_path);
LOCK_DESTROY(&priv->lock);
pthread_mutex_destroy(&priv->fsync_mutex);
+ pthread_cond_destroy(&priv->fsync_cond);
+ pthread_mutex_destroy(&priv->janitor_mutex);
+ pthread_cond_destroy(&priv->janitor_cond);
GF_FREE(priv->hostname);
GF_FREE(priv->trash_path);
GF_FREE(priv);
@@ -1200,7 +1360,7 @@ struct volume_options posix_options[] = {
{.key = {"health-check-timeout"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
- .default_value = "10",
+ .default_value = "20",
.validate = GF_OPT_VALIDATE_MIN,
.description =
"Interval in seconds to wait aio_write finish for health check, "
@@ -1208,11 +1368,11 @@ struct volume_options posix_options[] = {
.op_version = {GD_OP_VERSION_4_0_0},
.flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
{.key = {"reserve"},
- .type = GF_OPTION_TYPE_INT,
+ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
.min = 0,
.default_value = "1",
.validate = GF_OPT_VALIDATE_MIN,
- .description = "Percentage of disk space to be reserved."
+ .description = "Percentage/Size of disk space to be reserved."
" Set to 0 to disable",
.op_version = {GD_OP_VERSION_3_13_0},
.flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
@@ -1306,24 +1466,21 @@ struct volume_options posix_options[] = {
.min = 0000,
.max = 0777,
.default_value = "0000",
- .validate = GF_OPT_VALIDATE_MIN,
- .validate = GF_OPT_VALIDATE_MAX,
+ .validate = GF_OPT_VALIDATE_BOTH,
.description = "Mode bit permission that will always be set on a file."},
{.key = {"force-directory-mode"},
.type = GF_OPTION_TYPE_INT,
.min = 0000,
.max = 0777,
.default_value = "0000",
- .validate = GF_OPT_VALIDATE_MIN,
- .validate = GF_OPT_VALIDATE_MAX,
+ .validate = GF_OPT_VALIDATE_BOTH,
.description = "Mode bit permission that will be always set on directory"},
{.key = {"create-mask"},
.type = GF_OPTION_TYPE_INT,
.min = 0000,
.max = 0777,
.default_value = "0777",
- .validate = GF_OPT_VALIDATE_MIN,
- .validate = GF_OPT_VALIDATE_MAX,
+ .validate = GF_OPT_VALIDATE_BOTH,
.description = "Any bit not set here will be removed from the"
"modes set on a file when it is created"},
{.key = {"create-directory-mask"},
@@ -1331,8 +1488,7 @@ struct volume_options posix_options[] = {
.min = 0000,
.max = 0777,
.default_value = "0777",
- .validate = GF_OPT_VALIDATE_MIN,
- .validate = GF_OPT_VALIDATE_MAX,
+ .validate = GF_OPT_VALIDATE_BOTH,
.description = "Any bit not set here will be removed from the"
"modes set on a directory when it is created"},
{.key = {"max-hardlinks"},