diff options
Diffstat (limited to 'xlators/cluster/dht/src/dht-common.c')
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 1930 |
1 files changed, 1029 insertions, 901 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index f9e7141604a..8ba0cc4c732 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -10,48 +10,52 @@ /* TODO: add NS locking */ -#include <glusterfs/glusterfs.h> -#include <glusterfs/xlator.h> #include "libxlator.h" #include "dht-common.h" #include "dht-lock.h" -#include <glusterfs/defaults.h> #include <glusterfs/byte-order.h> #include <glusterfs/quota-common-utils.h> #include <glusterfs/upcall-utils.h> +#include "glusterfs/compat-errno.h" // for ENODATA on BSD +#include <glusterfs/common-utils.h> #include <sys/time.h> #include <libgen.h> #include <signal.h> -int run_defrag = 0; - -int -dht_link2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); +static int +dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata); -int -dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, - int ret); +static int +dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); -int -dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); +static int +dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req); -int -dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this); +static int +dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this); -int -dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata); +static int +dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata); -int -dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req); +static int +dht_rmdir_unlock(call_frame_t *frame, xlator_t *this); -int -dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req); +static const char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL}; -int -dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc); +/* Check the xdata to make sure EBADF has been set by client xlator */ +int32_t +dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno) +{ + if (op_ret == -1 && (op_errno == EBADF || op_errno == EBADFD) && + !(local->fd_checked)) { + return 1; + } + return 0; +} /* Sets the blocks and size values to fixed values. This is to be called * only for dirs. The caller is responsible for checking the type @@ -67,68 +71,17 @@ dht_set_fixed_dir_stat(struct iatt *stat) return -1; } -/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY - * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to - * DHT_IATT_IN_XDATA_KEY - */ -int -dht_request_iatt_in_xdata(xlator_t *this, dict_t *xattr_req) -{ - int ret = -1; - - ret = dict_set_int8(xattr_req, DHT_MODE_IN_XDATA_KEY, 1); - ret = dict_set_int8(xattr_req, DHT_IATT_IN_XDATA_KEY, 1); - - /* At least one call succeeded */ - return ret; -} - -/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY - * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to - * DHT_IATT_IN_XDATA_KEY - * This will return a dummy iatt with only the mode and type set - */ -int -dht_read_iatt_from_xdata(xlator_t *this, dict_t *xdata, struct iatt *stbuf) -{ - int ret = -1; - int32_t mode = 0; - - ret = dict_get_int32(xdata, DHT_MODE_IN_XDATA_KEY, &mode); - - if (ret) { - ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf); - } else { - stbuf->ia_prot = ia_prot_from_st_mode(mode); - stbuf->ia_type = ia_type_from_st_mode(mode); - } - - return ret; -} - -int -dht_rmdir_unlock(call_frame_t *frame, xlator_t *this); - -char *xattrs_to_heal[] = {"user.", - POSIX_ACL_ACCESS_XATTR, - POSIX_ACL_DEFAULT_XATTR, - QUOTA_LIMIT_KEY, - QUOTA_LIMIT_OBJECTS_KEY, - GF_SELINUX_XATTR_KEY, - GF_XATTR_MDATA_KEY, - NULL}; - -char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL}; - /* Return true if key exists in array */ static gf_boolean_t dht_match_xattr(const char *key) { + char **xattrs_to_heal = get_xattrs_to_heal(); + return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0; } -int +static int dht_aggregate_quota_xattr(dict_t *dst, char *key, data_t *value) { int ret = -1; @@ -191,7 +144,7 @@ out: return ret; } -int +static int add_opt(char **optsp, const char *opt) { char *newopts = NULL; @@ -269,7 +222,7 @@ out: */ -int +static int dht_aggregate_split_brain_xattr(dict_t *dst, char *key, data_t *value) { int ret = 0; @@ -368,7 +321,7 @@ out: return ret; } -int +static int dht_aggregate(dict_t *this, char *key, data_t *value, void *data) { dict_t *dst = NULL; @@ -415,7 +368,7 @@ out: return ret; } -void +static void dht_aggregate_xattr(dict_t *dst, dict_t *src) { if ((dst == NULL) || (src == NULL)) { @@ -497,7 +450,7 @@ dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol) - complete linkfile selfheal */ -int +static int dht_lookup_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { @@ -539,7 +492,7 @@ out: return ret; } -int +static int dht_discover_complete(xlator_t *this, call_frame_t *discover_frame) { dht_local_t *local = NULL; @@ -660,13 +613,14 @@ dht_discover_complete(xlator_t *this, call_frame_t *discover_frame) if (local->need_xattr_heal && !heal_path) { local->need_xattr_heal = 0; - ret = dht_dir_xattr_heal(this, local); - if (ret) - gf_msg(this->name, GF_LOG_ERROR, ret, + ret = dht_dir_xattr_heal(this, local, &op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_DIR_XATTR_HEAL_FAILED, "xattr heal failed for " "directory gfid is %s ", gfid_local); + } } } @@ -727,7 +681,7 @@ out: return ret; } -int +static int dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { @@ -736,6 +690,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int ret = -1; dht_conf_t *conf = 0; dht_layout_t *layout = NULL; + int32_t mds_heal_fresh_lookup = 0; GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, frame->local, out); @@ -743,6 +698,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; conf = this->private; layout = local->selfheal.layout; + mds_heal_fresh_lookup = local->mds_heal_fresh_lookup; if (op_ret) { gf_msg_debug(this->name, op_ret, @@ -763,11 +719,63 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, layout); } out: - if (local && local->mds_heal_fresh_lookup) + if (mds_heal_fresh_lookup) DHT_STACK_DESTROY(frame); return 0; } +static xlator_t * +dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc) +{ + char *path = NULL; + loc_t populate_loc = { + 0, + }; + char *name = NULL; + xlator_t *hash_subvol = NULL; + + if (!inode) + return hash_subvol; + + if (loc && loc->parent && loc->path) { + if (!loc->name) { + name = strrchr(loc->path, '/'); + if (name) { + loc->name = name + 1; + } else { + goto out; + } + } + hash_subvol = dht_subvol_get_hashed(this, loc); + goto out; + } + + if (!gf_uuid_is_null(inode->gfid)) { + populate_loc.inode = inode_ref(inode); + populate_loc.parent = inode_parent(populate_loc.inode, NULL, NULL); + inode_path(populate_loc.inode, NULL, &path); + + if (!path) + goto out; + + populate_loc.path = path; + if (!populate_loc.name && populate_loc.path) { + name = strrchr(populate_loc.path, '/'); + if (name) { + populate_loc.name = name + 1; + + } else { + goto out; + } + } + hash_subvol = dht_subvol_get_hashed(this, &populate_loc); + } +out: + if (populate_loc.inode) + loc_wipe(&populate_loc); + return hash_subvol; +} + /* Common function call by revalidate/selfheal code path to populate internal xattr if it is not present, mark_during_fresh_lookup value determines either function is call by revalidate_cbk(discover_complete) @@ -853,7 +861,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, "Failed to get hashed subvol for path %s" "gfid is %s ", local->loc.path, gfid_local); - (*errst) = 1; + if (errst) + (*errst) = 1; ret = -1; goto out; } @@ -924,7 +933,44 @@ out: return ret; } -int +/* Get the value of key from dict in the bytewise and save in array after + convert from network byte order to host byte order +*/ +static int32_t +dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size, + int *errst) +{ + void *ptr = NULL; + int32_t len = -1; + int32_t vindex = -1; + int32_t err = -1; + int ret = 0; + + if (dict == NULL) { + (*errst) = -1; + return -EINVAL; + } + err = dict_get_ptr_and_len(dict, key, &ptr, &len); + if (err != 0) { + (*errst) = -1; + return err; + } + + if (len != (size * sizeof(int32_t))) { + (*errst) = -1; + return -EINVAL; + } + + for (vindex = 0; vindex < size; vindex++) { + value[vindex] = ntoh32(*((int32_t *)ptr + vindex)); + if (value[vindex] < 0) + ret = -1; + } + + return ret; +} + +static int dht_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr, struct iatt *postparent) @@ -1019,7 +1065,7 @@ dht_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, if (local->xattr == NULL) { local->xattr = dict_ref(xattr); } else { - /* Don't aggregate for files. See BZ#1484113 */ + /* Don't aggregate for files. See BZ#1484709 */ if (is_dir) dht_aggregate_xattr(local->xattr, xattr); } @@ -1085,7 +1131,53 @@ out: return 0; } -int +static int +dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req) +{ + int ret = -EINVAL; + dht_conf_t *conf = NULL; + + conf = this->private; + if (!conf) { + goto err; + } + + if (!xattr_req) { + goto err; + } + + /* Used to check whether this is a linkto file. + */ + ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value:key = %s for " + "path %s", + conf->link_xattr_name, loc->path); + goto err; + } + + /* This is used to make sure we don't unlink linkto files + * which are the target of an ongoing file migration. + */ + ret = dict_set_uint32(xattr_req, GLUSTERFS_OPEN_FD_COUNT, 4); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value:key = %s for " + "path %s", + GLUSTERFS_OPEN_FD_COUNT, loc->path); + goto err; + } + + ret = 0; +err: + return ret; +} + +/* This is a gfid based nameless lookup. Without a name, the hashed subvol + * cannot be calculated so a lookup is sent to all subvols. + */ +static int dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc) { int ret; @@ -1099,6 +1191,9 @@ dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc) conf = this->private; local = frame->local; + /* As we do not know if this is a file or directory, request + * both file and directory xattrs + */ ret = dht_set_file_xattr_req(this, loc, local->xattr_req); if (ret) { goto err; @@ -1110,6 +1205,9 @@ dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc) } if (loc_is_root(loc)) { + /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash) + * set on the brick root. + */ ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name, sizeof(uint32_t)); } @@ -1151,48 +1249,11 @@ err: return 0; } -/* Get the value of key from dict in the bytewise and save in array after - convert from network byte order to host byte order -*/ -int32_t -dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size, - int *errst) -{ - void *ptr = NULL; - int32_t len = -1; - int32_t vindex = -1; - int32_t err = -1; - int ret = 0; - - if (dict == NULL) { - (*errst) = -1; - return -EINVAL; - } - err = dict_get_ptr_and_len(dict, key, &ptr, &len); - if (err != 0) { - (*errst) = -1; - return err; - } - - if (len != (size * sizeof(int32_t))) { - (*errst) = -1; - return -EINVAL; - } - - for (vindex = 0; vindex < size; vindex++) { - value[vindex] = ntoh32(*((int32_t *)ptr + vindex)); - if (value[vindex] < 0) - ret = -1; - } - - return ret; -} - /* Code to call syntask to heal custom xattr from hashed subvol to non hashed subvol */ int -dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) +dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno) { dht_local_t *copy_local = NULL; call_frame_t *copy = NULL; @@ -1204,6 +1265,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) "No gfid exists for path %s " "so healing xattr is not possible", local->loc.path); + *op_errno = EIO; goto out; } @@ -1217,6 +1279,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) "Memory allocation failed " "for path %s gfid %s ", local->loc.path, gfid_local); + *op_errno = ENOMEM; DHT_STACK_DESTROY(copy); } else { copy_local->stbuf = local->stbuf; @@ -1231,6 +1294,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) "Synctask creation failed to heal xattr " "for path %s gfid %s ", local->loc.path, gfid_local); + *op_errno = ENOMEM; DHT_STACK_DESTROY(copy); } } @@ -1239,7 +1303,7 @@ out: return ret; } -int +static int dht_needs_selfheal(call_frame_t *frame, xlator_t *this) { dht_local_t *local = NULL; @@ -1264,6 +1328,26 @@ dht_needs_selfheal(call_frame_t *frame, xlator_t *this) return needs_selfheal; } +static int +is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2) +{ + if ((prot1->owner.read != prot2->owner.read) || + (prot1->owner.write != prot2->owner.write) || + (prot1->owner.exec != prot2->owner.exec) || + (prot1->group.read != prot2->group.read) || + (prot1->group.write != prot2->group.write) || + (prot1->group.exec != prot2->group.exec) || + (prot1->other.read != prot2->other.read) || + (prot1->other.write != prot2->other.write) || + (prot1->other.exec != prot2->other.exec) || + (prot1->suid != prot2->suid) || (prot1->sgid != prot2->sgid) || + (prot1->sticky != prot2->sticky)) { + return 1; + } else { + return 0; + } +} + int dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, @@ -1351,13 +1435,31 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, dht_aggregate_xattr(local->xattr, xattr); } + if (__is_root_gfid(stbuf->ia_gfid)) { + ret = dht_dir_has_layout(xattr, conf->xattr_name); + if (ret >= 0) { + if (is_greater_time(local->prebuf.ia_ctime, + local->prebuf.ia_ctime_nsec, + stbuf->ia_ctime, stbuf->ia_ctime_nsec)) { + /* Choose source */ + local->prebuf.ia_gid = stbuf->ia_gid; + local->prebuf.ia_uid = stbuf->ia_uid; + + local->prebuf.ia_ctime = stbuf->ia_ctime; + local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec; + local->prebuf.ia_prot = stbuf->ia_prot; + } + } + } + if (local->stbuf.ia_type != IA_INVAL) { - /* This is not the first subvol to respond */ - if (!__is_root_gfid(stbuf->ia_gfid) && - ((local->stbuf.ia_gid != stbuf->ia_gid) || - (local->stbuf.ia_uid != stbuf->ia_uid) || - (is_permission_different(&local->stbuf.ia_prot, - &stbuf->ia_prot)))) { + /* This is not the first subvol to respond + * Compare values to see if attrs need to be healed + */ + if ((local->stbuf.ia_gid != stbuf->ia_gid) || + (local->stbuf.ia_uid != stbuf->ia_uid) || + (is_permission_different(&local->stbuf.ia_prot, + &stbuf->ia_prot))) { local->need_attrheal = 1; } } @@ -1377,6 +1479,9 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unlock; } + /* Save the mds subvol info and stbuf. This is the value that will + * be used for healing + */ local->mds_subvol = prev; local->mds_stbuf = *stbuf; @@ -1390,6 +1495,7 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key, mds_xattr_val, 1, &errst); if ((check_mds < 0) && !errst) { + /* Check if xattrs need to be healed on the directories */ local->mds_xattr = dict_ref(xattr); gf_msg_debug(this->name, 0, "%s: %s is not zero on %s. Xattrs need to be healed." @@ -1474,24 +1580,57 @@ out: return ret; } -int -is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2) +static int +dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc) { - if ((prot1->owner.read != prot2->owner.read) || - (prot1->owner.write != prot2->owner.write) || - (prot1->owner.exec != prot2->owner.exec) || - (prot1->group.read != prot2->group.read) || - (prot1->group.write != prot2->group.write) || - (prot1->group.exec != prot2->group.exec) || - (prot1->other.read != prot2->other.read) || - (prot1->other.write != prot2->other.write) || - (prot1->other.exec != prot2->other.exec) || - (prot1->suid != prot2->suid) || (prot1->sgid != prot2->sgid) || - (prot1->sticky != prot2->sticky)) { - return 1; - } else { - return 0; + int call_cnt = 0; + int i = 0; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int ret = 0; + + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", this, unwind); + GF_VALIDATE_OR_GOTO("dht", frame->local, unwind); + GF_VALIDATE_OR_GOTO("dht", this->private, unwind); + GF_VALIDATE_OR_GOTO("dht", loc, unwind); + + conf = this->private; + local = frame->local; + + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + local->layout = dht_layout_new(this, conf->subvolume_cnt); + if (!local->layout) { + goto unwind; + } + + if (local->xattr != NULL) { + dict_unref(local->xattr); + local->xattr = NULL; + } + + if (!gf_uuid_is_null(local->gfid)) { + /* use this gfid in order to heal any missing ones */ + ret = dict_set_gfuuid(local->xattr_req, "gfid-req", local->gfid, true); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "%s: Failed to set dictionary value:" + " key = gfid-req", + local->loc.path); + } + + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE( + frame, dht_lookup_dir_cbk, conf->subvolumes[i], conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, &local->loc, local->xattr_req); } + return 0; +unwind: + DHT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); +out: + return 0; } int @@ -1512,7 +1651,7 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, uint32_t vol_commit_hash = 0; xlator_t *subvol = NULL; int32_t check_mds = 0; - int errst = 0; + int errst = 0, i = 0; int32_t mds_xattr_val[1] = {0}; GF_VALIDATE_OR_GOTO("dht", frame, err); @@ -1526,6 +1665,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, conf = this->private; if (!conf->vch_forced) { + /* Update the commithash value if available + */ ret = dict_get_uint32(xattr, conf->commithash_xattr_name, &vol_commit_hash); if (ret == 0) { @@ -1577,11 +1718,21 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local->need_lookup_everywhere = 1; } else if (IA_ISDIR(local->loc.inode->ia_type)) { + layout = local->layout; + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == prev) { + layout->list[i].err = op_errno; + break; + } + } + local->need_selfheal = 1; } } - /* The GFID is missing on this subvol*/ + /* The GFID is missing on this subvol. Lookup everywhere to force a + * gfid heal + */ if ((op_errno == ENODATA) && (IA_ISDIR(local->loc.inode->ia_type))) { local->need_lookup_everywhere = 1; @@ -1663,6 +1814,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local->loc.path, prev->name); } if ((check_mds < 0) && !errst) { + /* Check if xattrs need to be healed on the directory + */ local->mds_xattr = dict_ref(xattr); gf_msg_debug(this->name, 0, "Value of %s is not zero on " @@ -1678,6 +1831,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ret = dht_layout_dir_mismatch(this, layout, prev, &local->loc, xattr); if (ret != 0) { + /* In memory layout does not match on-disk layout. + */ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_MISMATCH, "Mismatching layouts for %s, gfid = %s", local->loc.path, gfid); @@ -1704,6 +1859,8 @@ unlock: UNLOCK(&frame->lock); if (follow_link) { + /* Found a linkto file. Follow it to see if the target file exists + */ gf_uuid_copy(local->gfid, stbuf->ia_gfid); subvol = dht_linkfile_subvol(this, inode, stbuf, xattr); @@ -1733,6 +1890,7 @@ unlock: local->need_xattr_heal = 0; if (IA_ISDIR(local->stbuf.ia_type)) { + /* No mds xattr found. Trigger a heal to set it */ if (!__is_root_gfid(local->loc.inode->gfid) && (!dict_get(local->xattr, conf->mds_xattr_key))) local->need_selfheal = 1; @@ -1822,12 +1980,11 @@ err: return ret; } -int -dht_lookup_linkfile_create_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +static int +dht_lookup_linkfile_create_cbk(call_frame_t *frame, void *cooie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { dht_local_t *local = NULL; xlator_t *cached_subvol = NULL; @@ -1889,7 +2046,7 @@ out: return ret; } -int +static int dht_lookup_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) @@ -1915,7 +2072,7 @@ dht_lookup_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -int +static int dht_lookup_unlink_of_false_linkto_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, @@ -1967,7 +2124,7 @@ dht_lookup_unlink_of_false_linkto_cbk(call_frame_t *frame, void *cookie, return 0; } -int +static int dht_lookup_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, @@ -2000,38 +2157,25 @@ dht_lookup_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie, return 0; } -int +static int dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict) { int ret = 0; - xlator_t *this = NULL; - char *linktoskip_key = NULL; - - this = THIS; - GF_VALIDATE_OR_GOTO("dht", this, err); - - if (dht_is_tier_xlator(this)) - linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK; - else - linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK; - ret = dict_set_int32(dict, linktoskip_key, 1); + ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1); if (ret) - goto err; + return -1; - ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); + ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); if (ret) - goto err; + return -1; return 0; - -err: - return -1; } -int32_t +static int32_t dht_linkfile_create_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, @@ -2105,7 +2249,7 @@ no_linkto: return 0; } -int32_t +static int32_t dht_call_lookup_linkfile_create(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) @@ -2191,7 +2335,7 @@ err: * dht_lookup_everywhere_done takes decision based on any of the above case */ -int +static int dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this) { int ret = 0; @@ -2505,7 +2649,7 @@ unwind_hashed_and_cached: return 0; } -int +static int dht_lookup_everywhere_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xattr, @@ -2841,116 +2985,12 @@ out: return 0; } -int -dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - int call_cnt = 0; - int i = 0; - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - int ret = 0; - - GF_VALIDATE_OR_GOTO("dht", frame, out); - GF_VALIDATE_OR_GOTO("dht", this, unwind); - GF_VALIDATE_OR_GOTO("dht", frame->local, unwind); - GF_VALIDATE_OR_GOTO("dht", this->private, unwind); - GF_VALIDATE_OR_GOTO("dht", loc, unwind); - - conf = this->private; - local = frame->local; - - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; - - local->layout = dht_layout_new(this, conf->subvolume_cnt); - if (!local->layout) { - goto unwind; - } - - if (local->xattr != NULL) { - dict_unref(local->xattr); - local->xattr = NULL; - } - - if (!gf_uuid_is_null(local->gfid)) { - ret = dict_set_gfuuid(local->xattr_req, "gfid-req", local->gfid, true); - if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value:" - " key = gfid-req", - local->loc.path); - } - - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE( - frame, dht_lookup_dir_cbk, conf->subvolumes[i], conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, &local->loc, local->xattr_req); - } - return 0; -unwind: - DHT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); -out: - return 0; -} - /* Code to get hashed subvol based on inode and loc First it check if loc->parent and loc->path exist then it get hashed subvol based on loc. */ -xlator_t * -dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc) -{ - char *path = NULL; - loc_t populate_loc = { - 0, - }; - char *name = NULL; - xlator_t *hash_subvol = NULL; - - if (!inode) - return hash_subvol; - - if (loc && loc->parent && loc->path) { - if (!loc->name) { - name = strrchr(loc->path, '/'); - if (name) { - loc->name = name + 1; - } else { - goto out; - } - } - hash_subvol = dht_subvol_get_hashed(this, loc); - goto out; - } - - if (!gf_uuid_is_null(inode->gfid)) { - populate_loc.inode = inode_ref(inode); - populate_loc.parent = inode_parent(populate_loc.inode, NULL, NULL); - inode_path(populate_loc.inode, NULL, &path); - - if (!path) - goto out; - - populate_loc.path = path; - if (!populate_loc.name && populate_loc.path) { - name = strrchr(populate_loc.path, '/'); - if (name) { - populate_loc.name = name + 1; - - } else { - goto out; - } - } - hash_subvol = dht_subvol_get_hashed(this, &populate_loc); - } -out: - if (populate_loc.inode) - loc_wipe(&populate_loc); - return hash_subvol; -} - -gf_boolean_t +static gf_boolean_t dht_should_lookup_everywhere(xlator_t *this, dht_conf_t *conf, loc_t *loc) { dht_layout_t *parent_layout = NULL; @@ -3063,10 +3103,12 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, * or any more call which happens from this 'loc'. */ if (gf_uuid_is_null(local->gfid)) { + /*This is set from the first successful response*/ memcpy(local->gfid, stbuf->ia_gfid, 16); } if (!conf->vch_forced) { + /* Update the commit hash in conf if it is found */ ret = dict_get_uint32(xattr, conf->commithash_xattr_name, &vol_commit_hash); if (ret == 0) { @@ -3076,6 +3118,8 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, is_dir = check_is_dir(inode, stbuf, xattr); if (is_dir) { + /* A directory is present on all subvols, send the lookup to + * all subvols now */ local->inode = inode_ref(inode); local->xattr = dict_ref(xattr); dht_lookup_directory(frame, this, &local->loc); @@ -3085,7 +3129,9 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name); if (!is_linkfile) { - /* non-directory and not a linkfile */ + /* non-directory and not a linkto file. This is a data file + * Update the layout to point to the cached subvol + */ ret = dht_layout_preset(this, prev, inode); if (ret < 0) { @@ -3099,6 +3145,9 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, goto out; } + /* This is a linkto file. Get the value of the target subvol from the + * linkto xattr and lookup there to see if the file exists + */ subvol = dht_linkfile_subvol(this, inode, stbuf, xattr); if (!subvol) { gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, @@ -3139,7 +3188,7 @@ err: * xlator), if not, request for them. These xattrs are needed for dht dir * self-heal to perform proper self-healing of dirs */ -void +static void dht_check_and_set_acl_xattr_req(xlator_t *this, dict_t *xattr_req) { int ret = 0; @@ -3170,7 +3219,7 @@ dht_check_and_set_acl_xattr_req(xlator_t *this, dict_t *xattr_req) * the mds information : trusted.glusterfs.dht.mds * the acl info: See above */ -int +static int dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req) { int ret = -EINVAL; @@ -3211,50 +3260,109 @@ err: return ret; } -int -dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req) +/* If the hashed subvol is present, send the lookup to only that subvol first. + * If no hashed subvol, send a lookup to all subvols and proceed based on the + * responses. + */ +static int +dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc) { - int ret = -EINVAL; + int ret = -1; dht_conf_t *conf = NULL; + xlator_t *hashed_subvol = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int call_cnt = 0; + int i = 0; conf = this->private; if (!conf) { + op_errno = EINVAL; goto err; } - if (!xattr_req) { + local = frame->local; + if (!local) { + op_errno = EINVAL; goto err; } - /* Used to check whether this is a linkto file. - */ - ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256); - if (ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s for " - "path %s", - conf->link_xattr_name, loc->path); + /* Since we don't know whether this is a file or a directory, + * request all xattrs*/ + ret = dht_set_file_xattr_req(this, loc, local->xattr_req); + if (ret) { + op_errno = -ret; goto err; } - /* This is used to make sure we don't unlink linkto files - * which are the target of an ongoing file migration. - */ - ret = dict_set_uint32(xattr_req, GLUSTERFS_OPEN_FD_COUNT, 4); + ret = dht_set_dir_xattr_req(this, loc, local->xattr_req); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s for " - "path %s", - GLUSTERFS_OPEN_FD_COUNT, loc->path); + op_errno = -ret; goto err; } - ret = 0; + /* Fuse sets a random value in gfid-req. If the gfid is missing + * on one or more subvols, posix will set the gfid to this value, + * causing GFID mismatches for directories. Remove the value fuse + * has sent before sending the lookup. + */ + ret = dict_get_gfuuid(local->xattr_req, "gfid-req", &local->gfid_req); + if (ret) { + gf_msg_debug(this->name, 0, "%s: No gfid-req available", loc->path); + } else { + dict_del(local->xattr_req, "gfid-req"); + } + /* This should have been set in dht_lookup */ + hashed_subvol = local->hashed_subvol; + + if (!hashed_subvol) { + gf_msg_debug(this->name, 0, + "%s: no subvolume in layout for path, " + "checking on all the subvols to see if " + "it is a directory", + loc->path); + + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + /* Allocate a layout. This will be populated and saved in + * the dht inode_ctx on successful lookup + */ + local->layout = dht_layout_new(this, conf->subvolume_cnt); + if (!local->layout) { + op_errno = ENOMEM; + goto err; + } + + gf_msg_debug(this->name, 0, + "%s: Found null hashed subvol. Calling lookup" + " on all nodes.", + loc->path); + + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, &local->loc, + local->xattr_req); + } + return 0; + } + + /* if the hashed_subvol is non-null, send the lookup there first so + * as to see whether we have a file or a directory */ + gf_msg_debug(this->name, 0, "%s: Calling fresh lookup on %s", loc->path, + hashed_subvol->name); + + STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, hashed_subvol, + hashed_subvol->fops->lookup, loc, local->xattr_req); + return 0; err: - return ret; + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + return 0; } -int +static int dht_do_revalidate(call_frame_t *frame, xlator_t *this, loc_t *loc) { xlator_t *subvol = NULL; @@ -3329,6 +3437,11 @@ dht_do_revalidate(call_frame_t *frame, xlator_t *this, loc_t *loc) } local->mds_subvol = mds_subvol; local->call_cnt = conf->subvolume_cnt; + + /* local->call_cnt will change as responses are processed. Always use a + * local copy to loop through the STACK_WIND calls + */ + call_cnt = local->call_cnt; for (i = 0; i < call_cnt; i++) { @@ -3362,99 +3475,11 @@ err: return 0; } -int -dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - int ret = -1; - dht_conf_t *conf = NULL; - xlator_t *hashed_subvol = NULL; - dht_local_t *local = NULL; - int op_errno = -1; - int call_cnt = 0; - int i = 0; - - conf = this->private; - if (!conf) { - op_errno = EINVAL; - goto err; - } - - local = frame->local; - if (!local) { - op_errno = EINVAL; - goto err; - } - - /* Since we don't know whether this is a file or a directory, - * request all xattrs*/ - ret = dht_set_file_xattr_req(this, loc, local->xattr_req); - if (ret) { - op_errno = -ret; - goto err; - } - - ret = dht_set_dir_xattr_req(this, loc, local->xattr_req); - if (ret) { - op_errno = -ret; - goto err; - } - - /* Fuse sets a random value in gfid-req. If the gfid is missing - * on one or more subvols, posix will set the gfid to this value, - * causing GFID mismatches for directories. - */ - ret = dict_get_gfuuid(local->xattr_req, "gfid-req", &local->gfid_req); - if (ret) { - gf_msg_debug(this->name, 0, "%s: No gfid-req available", loc->path); - } else { - dict_del(local->xattr_req, "gfid-req"); - } - /* This should have been set in dht_lookup */ - hashed_subvol = local->hashed_subvol; - - if (!hashed_subvol) { - gf_msg_debug(this->name, 0, - "%s: no subvolume in layout for path, " - "checking on all the subvols to see if " - "it is a directory", - loc->path); - - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; - - local->layout = dht_layout_new(this, conf->subvolume_cnt); - if (!local->layout) { - op_errno = ENOMEM; - goto err; - } - - gf_msg_debug(this->name, 0, - "%s: Found null hashed subvol. Calling lookup" - " on all nodes.", - loc->path); - - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, &local->loc, - local->xattr_req); - } - return 0; - } - - /* if we have the hashed_subvol, send the lookup there first so - * as to see whether we have a file or a directory */ - gf_msg_debug(this->name, 0, "%s: Calling fresh lookup on %s", loc->path, - hashed_subvol->name); - - STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, hashed_subvol, - hashed_subvol->fops->lookup, loc, local->xattr_req); - return 0; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); - return 0; -} +/* Depending on the input, decide if this is a: + * fresh-lookup: loc->name is provided but no dht inode ctx + * revalidation: loc->name is provided, dht inode ctx is present + * discover: gfid based nameless lookup. + */ int dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) @@ -3508,6 +3533,10 @@ dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) /* Nameless lookup */ + /* This is usually sent by NFS. Lookups are done based on the gfid and + * no name information is available. Without the name, dht cannot calculate + * the hash and has to send a lookup to all subvols. + */ if (gf_uuid_is_null(loc->pargfid) && !gf_uuid_is_null(loc->gfid) && !__is_root_gfid(loc->inode->gfid)) { local->cached_subvol = NULL; @@ -3516,6 +3545,9 @@ dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) } if (loc_is_root(loc)) { + /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash) + * set on the brick root. + */ ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name, sizeof(uint32_t)); } @@ -3524,12 +3556,14 @@ dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) hashed_subvol = dht_subvol_get_hashed(this, loc); local->hashed_subvol = hashed_subvol; - /* The entry has been looked up before and has an inode_ctx set - */ if (is_revalidate(loc)) { + /* The entry has been looked up before and has a dht inode_ctx + */ dht_do_revalidate(frame, this, loc); return 0; } else { + /* Entry has not been looked up before + */ dht_do_fresh_lookup(frame, this, loc); return 0; } @@ -3541,7 +3575,7 @@ err: return 0; } -int +static int dht_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) @@ -3575,7 +3609,7 @@ post_unlock: return 0; } -int +static int dht_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) @@ -3666,7 +3700,7 @@ dht_fix_layout_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -int +static int dht_err_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { @@ -3738,7 +3772,7 @@ dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size) return ret; } -int +static int dht_common_mds_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) @@ -3781,7 +3815,7 @@ out: /* Code to wind a xattrop call to add 1 on current mds internal xattr value */ -int +static int dht_setxattr_non_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { @@ -3894,7 +3928,7 @@ just_return: return 0; } -int +static int dht_setxattr_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { @@ -3981,7 +4015,7 @@ just_return: return 0; } -int +static int dht_xattrop_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *dict, dict_t *xdata) { @@ -4094,7 +4128,7 @@ dht_fill_pathinfo_xattr(xlator_t *this, dht_local_t *local, char *xattr_buf, } } -int +static int dht_vgetxattr_alloc_and_fill(dht_local_t *local, dict_t *xattr, xlator_t *this, int op_errno) { @@ -4143,7 +4177,7 @@ out: return ret; } -int +static int dht_vgetxattr_fill_and_set(dht_local_t *local, dict_t **dict, xlator_t *this, gf_boolean_t flag) { @@ -4199,7 +4233,7 @@ out: return ret; } -int +static int dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) @@ -4241,8 +4275,11 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local->op_ret = -1; local->op_errno = op_errno; UNLOCK(&frame->lock); - gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED, - "getxattr err for dir"); + if (op_errno == ENODATA) + gf_msg_debug(this->name, 0, "failed to get node-uuid"); + else + gf_msg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid"); goto post_unlock; } @@ -4264,6 +4301,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, index = conf->local_subvols_cnt; uuid_list_copy = gf_strdup(uuid_list); + if (!uuid_list_copy) + goto unlock; for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str; uuid_str = next_uuid_str) { @@ -4353,7 +4392,7 @@ out: return 0; } -int +static int dht_vgetxattr_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { @@ -4419,7 +4458,7 @@ out: return 0; } -int +static int dht_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { @@ -4465,7 +4504,7 @@ cleanup: return 0; } -int +static int dht_linkinfo_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) @@ -4487,7 +4526,7 @@ dht_linkinfo_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -int +static int dht_mds_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { @@ -4528,6 +4567,7 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int this_call_cnt = 0; dht_local_t *local = NULL; dht_conf_t *conf = NULL; + int ret = 0; VALIDATE_OR_GOTO(frame, err); VALIDATE_OR_GOTO(frame->local, err); @@ -4536,6 +4576,13 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, conf = this->private; local = frame->local; + if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto err; + return 0; + } + LOCK(&frame->lock); { if (!xattr || (op_ret == -1)) { @@ -4546,18 +4593,8 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, dict_del(xattr, conf->xattr_name); dict_del(xattr, conf->mds_xattr_key); - /* filter out following two xattrs that need not - * be visible on the mount point for geo-rep - - * trusted.tier.fix.layout.complete and - * trusted.tier.tier-dht.commithash - */ - dict_del(xattr, conf->commithash_xattr_name); - if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) { - dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY); - } - if (frame->root->pid >= 0) { GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr); GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr); @@ -4598,7 +4635,7 @@ err: return 0; } -int32_t +static int32_t dht_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict, dict_t *xdata) { @@ -4606,7 +4643,7 @@ dht_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict, return 0; } -int +static int dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) @@ -4707,7 +4744,7 @@ post_unlock: return 0; } -int +static int dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, dict_t *xdata) { @@ -4734,7 +4771,7 @@ dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc, return 0; } -int +static int dht_marker_populate_args(call_frame_t *frame, int type, int *gauge, xlator_t **subvols) { @@ -4751,8 +4788,8 @@ dht_marker_populate_args(call_frame_t *frame, int type, int *gauge, return layout->cnt; } -int -dht_is_debug_xattr_key(char **array, char *key) +static int +dht_is_debug_xattr_key(const char **array, char *key) { int i = 0; @@ -4766,7 +4803,7 @@ dht_is_debug_xattr_key(char **array, char *key) /* Note we already have frame->local initialised here*/ -int +static int dht_handle_debug_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key) { @@ -4778,10 +4815,6 @@ dht_handle_debug_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name = NULL; local = frame->local; - if (!key) { - op_errno = EINVAL; - goto out; - } if (dht_is_debug_xattr_key(dht_dbg_vxattrs, (char *)key) == -1) { goto out; @@ -4835,6 +4868,60 @@ out: return 0; } +/* Virtual Xattr which returns 1 if all subvols are up, + else returns 0. Geo-rep then uses this virtual xattr + after a fresh mount and starts the I/O. +*/ + +enum dht_vxattr_subvol { + DHT_VXATTR_SUBVOLS_UP = 1, + DHT_VXATTR_SUBVOLS_DOWN = 0, +}; + +int +dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this, + const char *key) +{ + dht_local_t *local = NULL; + int ret = -1; + int op_errno = ENODATA; + int value = DHT_VXATTR_SUBVOLS_UP; + int i = 0; + dht_conf_t *conf = NULL; + + conf = this->private; + local = frame->local; + + if (!key) { + op_errno = EINVAL; + goto out; + } + local->xattr = dict_new(); + if (!local->xattr) { + op_errno = ENOMEM; + goto out; + } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->subvolume_status[i]) { + value = DHT_VXATTR_SUBVOLS_DOWN; + gf_msg_debug(this->name, 0, "subvol %s is down ", + conf->subvolumes[i]->name); + break; + } + } + ret = dict_set_int8(local->xattr, (char *)key, value); + if (ret < 0) { + op_errno = -ret; + ret = -1; + goto out; + } + ret = 0; + +out: + DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL); + return 0; +} + int dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, dict_t *xdata) @@ -4892,6 +4979,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, goto err; } + if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) { + dht_vgetxattr_subvol_status(frame, this, key); + return 0; + } + /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */ if (!DHT_IS_DIR(layout)) goto no_dht_is_dir; @@ -5187,6 +5279,53 @@ err: return 0; } +static int +dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +{ + dht_local_t *local = NULL; + int op_errno = EINVAL; + + if (!frame || !frame->local) + goto err; + + local = frame->local; + op_errno = local->op_errno; + + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, + local->rebalance.xdata); + return 0; + } + + if (subvol == NULL) + goto err; + + local->call_cnt = 2; /* This is the second attempt */ + + if (local->fop == GF_FOP_SETXATTR) { + STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol, + subvol->fops->setxattr, &local->loc, + local->rebalance.xattr, local->rebalance.flags, + local->xattr_req); + } else { + STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol, + subvol->fops->fsetxattr, local->fd, + local->rebalance.xattr, local->rebalance.flags, + local->xattr_req); + } + + return 0; + +err: + DHT_STACK_UNWIND(setxattr, frame, (local ? local->op_ret : -1), op_errno, + NULL); + return 0; +} + int dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) @@ -5203,8 +5342,8 @@ dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local->op_errno = op_errno; - if ((local->fop == GF_FOP_FSETXATTR) && op_ret == -1 && - (op_errno == EBADF) && !(local->fd_checked)) { + if ((local->fop == GF_FOP_FSETXATTR) && + dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { ret = dht_check_and_open_fd_on_subvol(this, frame); if (ret) goto out; @@ -5277,7 +5416,7 @@ dht_is_user_xattr(dict_t *this, char *key, data_t *value, void *data) /* Common code to wind a (f)(set|remove)xattr call to set xattr on directory */ -int +static int dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xattr, int flags, dict_t *xdata, int *op_errno) @@ -5294,11 +5433,13 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc, int call_cnt = 0; dht_local_t *local = NULL; char gfid_local[GF_UUID_BUF_SIZE] = {0}; + char **xattrs_to_heal; conf = this->private; local = frame->local; call_cnt = conf->subvolume_cnt; local->flags = flags; + xattrs_to_heal = get_xattrs_to_heal(); if (!gf_uuid_is_null(local->gfid)) { gf_uuid_unparse(local->gfid, gfid_local); @@ -5530,7 +5671,7 @@ err: return 0; } -int +static int dht_checking_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) @@ -5569,54 +5710,7 @@ out: return 0; } -int -dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) -{ - dht_local_t *local = NULL; - int op_errno = EINVAL; - - if (!frame || !frame->local) - goto err; - - local = frame->local; - op_errno = local->op_errno; - - if (we_are_not_migrating(ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, - local->rebalance.xdata); - return 0; - } - - if (subvol == NULL) - goto err; - - local->call_cnt = 2; /* This is the second attempt */ - - if (local->fop == GF_FOP_SETXATTR) { - STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol, - subvol->fops->setxattr, &local->loc, - local->rebalance.xattr, local->rebalance.flags, - local->xattr_req); - } else { - STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol, - subvol->fops->fsetxattr, local->fd, - local->rebalance.xattr, local->rebalance.flags, - local->xattr_req); - } - - return 0; - -err: - DHT_STACK_UNWIND(setxattr, frame, (local ? local->op_ret : -1), op_errno, - NULL); - return 0; -} - -int +static int dht_nuke_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) @@ -5625,7 +5719,7 @@ dht_nuke_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -int +static int dht_nuke_dir(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp) { if (!IA_ISDIR(loc->inode->ia_type)) { @@ -5778,22 +5872,7 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, if (local->rebalance.target_node) { local->flags = forced_rebalance; - /* Flag to suggest its a tiering migration - * The reason for this dic key-value is that - * promotions and demotions are multithreaded - * so the original frame from gf_defrag_start() - * is not carried. A new frame will be created when - * we do syncop_setxattr(). This does not have the - * frame->root->pid of the original frame. So we pass - * this dic key-value when we do syncop_setxattr() to do - * data migration and set the frame->root->pid to - * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before - * calling dht_start_rebalance_task() */ - tmp = dict_get(xattr, TIERING_MIGRATION_KEY); - if (tmp) - frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG; - else - frame->root->pid = GF_CLIENT_PID_DEFRAG; + frame->root->pid = GF_CLIENT_PID_DEFRAG; ret = dht_start_rebalance_task(this, frame); if (!ret) @@ -5912,6 +5991,50 @@ err: return 0; } +static int +dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +{ + dht_local_t *local = NULL; + int op_errno = EINVAL; + + if (!frame || !frame->local) + goto err; + + local = frame->local; + op_errno = local->op_errno; + + local->call_cnt = 2; /* This is the second attempt */ + + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, + local->rebalance.xdata); + return 0; + } + + if (subvol == NULL) + goto err; + + if (local->fop == GF_FOP_REMOVEXATTR) { + STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol, + subvol->fops->removexattr, &local->loc, local->key, + local->xattr_req); + } else { + STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol, + subvol->fops->fremovexattr, local->fd, local->key, + local->xattr_req); + } + + return 0; + +err: + DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL); + return 0; +} + int dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) @@ -5928,8 +6051,8 @@ dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local->op_errno = op_errno; - if ((local->fop == GF_FOP_FREMOVEXATTR) && (op_ret == -1) && - (op_errno == EBADF) && !(local->fd_checked)) { + if ((local->fop == GF_FOP_FREMOVEXATTR) && + dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { ret = dht_check_and_open_fd_on_subvol(this, frame); if (ret) goto out; @@ -5989,84 +6112,6 @@ out: } int -dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) -{ - dht_local_t *local = NULL; - int op_errno = EINVAL; - - if (!frame || !frame->local) - goto err; - - local = frame->local; - op_errno = local->op_errno; - - local->call_cnt = 2; /* This is the second attempt */ - - if (we_are_not_migrating(ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, - local->rebalance.xdata); - return 0; - } - - if (subvol == NULL) - goto err; - - if (local->fop == GF_FOP_REMOVEXATTR) { - STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol, - subvol->fops->removexattr, &local->loc, local->key, - local->xattr_req); - } else { - STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol, - subvol->fops->fremovexattr, local->fd, local->key, - local->xattr_req); - } - - return 0; - -err: - DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL); - return 0; -} - -int -dht_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - - local = frame->local; - prev = cookie; - - LOCK(&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - UNLOCK(&frame->lock); - gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", - prev->name); - goto post_unlock; - } - - local->op_ret = 0; - } - UNLOCK(&frame->lock); -post_unlock: - this_call_cnt = dht_frame_return(frame); - if (is_last_call(this_call_cnt)) { - DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, - NULL); - } - - return 0; -} - -int dht_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, dict_t *xdata) { @@ -6264,7 +6309,7 @@ post_unlock: /* * dht_normalize_stats - */ -void +static void dht_normalize_stats(struct statvfs *buf, unsigned long bsize, unsigned long frsize) { @@ -6283,7 +6328,7 @@ dht_normalize_stats(struct statvfs *buf, unsigned long bsize, } } -int +static int dht_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct statvfs *statvfs, dict_t *xdata) { @@ -6393,9 +6438,7 @@ dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) int i = -1; inode_t *inode = NULL; inode_table_t *itable = NULL; - uuid_t root_gfid = { - 0, - }; + static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; loc_t newloc = { 0, }; @@ -6421,7 +6464,6 @@ dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) } loc = &local->loc2; - root_gfid[15] = 1; inode = inode_find(itable, root_gfid); if (!inode) { @@ -6539,7 +6581,7 @@ err: this information layout can be constructed and set in inode. */ -void +static void dht_populate_inode_for_dentry(xlator_t *this, xlator_t *subvol, gf_dirent_t *entry, gf_dirent_t *orig_entry) { @@ -6586,7 +6628,7 @@ out: /* Posix returns op_errno = ENOENT to indicate that there are no more * entries */ -int +static int dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) { @@ -6642,10 +6684,9 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, layout = local->layout; - /* We have seen crashes in while running "rm -rf" on tier volumes - when the layout was NULL on the hot tier. This will skip the - entries on the subvol without a layout, hence preventing the crash - but rmdir might fail with "directory not empty" errors*/ + /* This will skip the entries on the subvol without a layout, + * hence preventing the crash but rmdir might fail with + * "directory not empty" errors*/ if (layout == NULL) goto done; @@ -6663,13 +6704,12 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, orig_entry->d_name, orig_entry->d_type); if (IA_ISINVAL(orig_entry->d_stat.ia_type)) { - /*stat failed somewhere- ignore this entry*/ - gf_msg_debug(this->name, EINVAL, - "Invalid stat, ignoring entry " - "%s gfid %s", + /*stat failed somewhere- display this entry but the data may + * be inaccurate. + */ + gf_msg_debug(this->name, EINVAL, "Invalid stat for %s (gfid %s)", orig_entry->d_name, uuid_utoa(orig_entry->d_stat.ia_gfid)); - continue; } if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict, @@ -6878,7 +6918,7 @@ unwind: return 0; } -int +static int dht_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) { @@ -7001,7 +7041,7 @@ unwind: return 0; } -int +static int dht_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t yoff, int whichop, dict_t *dict) { @@ -7125,7 +7165,7 @@ dht_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, return 0; } -int +static int dht_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { @@ -7255,7 +7295,7 @@ out: return 0; } -int +static int dht_mknod_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, @@ -7306,7 +7346,7 @@ err: return 0; } -int +static int dht_mknod_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, xlator_t *subvol, loc_t *loc, dev_t rdev, mode_t mode, mode_t umask, dict_t *params) @@ -7352,7 +7392,7 @@ out: return 0; } -int32_t +static int32_t dht_mknod_do(call_frame_t *frame) { dht_local_t *local = NULL; @@ -7402,7 +7442,7 @@ err: return 0; } -int32_t +static int32_t dht_mknod_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { @@ -7410,7 +7450,7 @@ dht_mknod_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -int32_t +static int32_t dht_mknod_finish(call_frame_t *frame, xlator_t *this, int op_ret, int invoke_cbk) { @@ -7462,7 +7502,7 @@ done: return 0; } -int32_t +static int32_t dht_mknod_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { @@ -7499,7 +7539,7 @@ err: return 0; } -int32_t +static int32_t dht_mknod_lock(call_frame_t *frame, xlator_t *subvol) { dht_local_t *local = NULL; @@ -7544,7 +7584,7 @@ err: return -1; } -int +static int dht_refresh_parent_layout_resume(call_frame_t *frame, xlator_t *this, int ret, int invoke_cbk) { @@ -7574,7 +7614,7 @@ dht_refresh_parent_layout_resume(call_frame_t *frame, xlator_t *this, int ret, return 0; } -int +static int dht_refresh_parent_layout_done(call_frame_t *frame) { dht_local_t *local = NULL; @@ -7595,7 +7635,7 @@ resume: return 0; } -int +static int dht_handle_parent_layout_change(xlator_t *this, call_stub_t *stub) { call_frame_t *refresh_frame = NULL, *frame = NULL; @@ -7630,7 +7670,7 @@ dht_handle_parent_layout_change(xlator_t *this, call_stub_t *stub) return 0; } -int32_t +static int32_t dht_call_mkdir_stub(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { @@ -7653,7 +7693,7 @@ dht_call_mkdir_stub(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -int32_t +static int32_t dht_guard_parent_layout_and_namespace(xlator_t *subvol, call_stub_t *stub) { dht_local_t *local = NULL; @@ -7964,7 +8004,58 @@ err: return 0; } -int +static int +dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data) +{ + DHT_STACK_DESTROY(sync_frame); + return 0; +} + +static int +dht_remove_stale_linkto(void *data) +{ + call_frame_t *frame = NULL; + dht_local_t *local = NULL; + xlator_t *this = NULL; + dict_t *xdata_in = NULL; + int ret = 0; + + GF_VALIDATE_OR_GOTO("dht", data, out); + + frame = data; + local = frame->local; + this = frame->this; + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", local, out); + GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out); + + xdata_in = dict_new(); + if (!xdata_in) + goto out; + + ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(xdata_in); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, 0, + "Failed to set keys for stale linkto" + "deletion on path %s", + local->loc.path); + goto out; + } + + ret = syncop_unlink(local->link_subvol, &local->loc, xdata_in, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, 0, + "Removal of linkto failed" + " on path %s at subvol %s", + local->loc.path, local->link_subvol->name); + } +out: + if (xdata_in) + dict_unref(xdata_in); + return ret; +} + +static int dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) @@ -8080,7 +8171,7 @@ out: return 0; } -int +static int dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -8136,7 +8227,7 @@ err: return 0; } -int +static int dht_link_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, @@ -8239,6 +8330,11 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, xlator_t *prev = NULL; int ret = -1; dht_local_t *local = NULL; + gf_boolean_t parent_layout_changed = _gf_false; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + xlator_t *subvol = NULL; + + local = frame->local; local = frame->local; if (!local) { @@ -8247,8 +8343,69 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, goto out; } - if (op_ret == -1) + if (op_ret == -1) { + local->op_errno = op_errno; + parent_layout_changed = (xdata && + dict_get(xdata, GF_PREOP_CHECK_FAILED)) + ? _gf_true + : _gf_false; + + if (parent_layout_changed) { + if (local && local->lock[0].layout.parent_layout.locks) { + /* Returning failure as the layout could not be fixed even under + * the lock */ + goto out; + } + + gf_uuid_unparse(local->loc.parent->gfid, pgfid); + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED, + "create (%s/%s) (path: %s): parent layout " + "changed. Attempting a layout refresh and then a " + "retry", + pgfid, local->loc.name, local->loc.path); + + /* + dht_refresh_layout needs directory info in local->loc.Hence, + storing the parent_loc in local->loc and storing the create + context in local->loc2. We will restore this information in + dht_creation_do. + */ + + loc_wipe(&local->loc2); + + ret = loc_copy(&local->loc2, &local->loc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "loc_copy failed %s", local->loc.path); + + goto out; + } + + loc_wipe(&local->loc); + + ret = dht_build_parent_loc(this, &local->loc, &local->loc2, + &op_errno); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED, + "parent loc build failed"); + goto out; + } + + subvol = dht_subvol_get_hashed(this, &local->loc2); + + ret = dht_create_lock(frame, subvol); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, + "locking parent failed"); + goto out; + } + + return 0; + } + goto out; + } prev = cookie; @@ -8303,7 +8460,7 @@ out: return 0; } -int +static int dht_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *stbuf, @@ -8354,7 +8511,7 @@ err: return 0; } -int +static int dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, @@ -8369,6 +8526,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, subvol->name); + dht_set_parent_layout_in_dict(loc, this, local); + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, subvol->fops->create, loc, flags, mode, umask, fd, params); @@ -8377,10 +8536,6 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, avail_subvol = dht_free_disk_available_subvol(this, subvol, local); if (avail_subvol != subvol) { - local->params = dict_ref(params); - local->flags = flags; - local->mode = mode; - local->umask = umask; local->cached_subvol = avail_subvol; local->hashed_subvol = subvol; @@ -8396,6 +8551,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, subvol->name); + dht_set_parent_layout_in_dict(loc, this, local); + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, subvol->fops->create, loc, flags, mode, umask, fd, params); @@ -8464,7 +8621,7 @@ out: return ret; } -int32_t +static int32_t dht_create_do(call_frame_t *frame) { dht_local_t *local = NULL; @@ -8514,7 +8671,7 @@ err: return 0; } -int32_t +static int32_t dht_create_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { @@ -8522,7 +8679,7 @@ dht_create_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -int32_t +static int32_t dht_create_finish(call_frame_t *frame, xlator_t *this, int op_ret, int invoke_cbk) { @@ -8574,7 +8731,7 @@ done: return 0; } -int32_t +static int32_t dht_create_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { @@ -8657,6 +8814,60 @@ err: } int +dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local) +{ + dht_conf_t *conf = this->private; + dht_layout_t *parent_layout = NULL; + int *parent_disk_layout = NULL; + xlator_t *hashed_subvol = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + int ret = 0; + + gf_uuid_unparse(loc->parent->gfid, pgfid); + + parent_layout = dht_layout_get(this, loc->parent); + hashed_subvol = dht_subvol_get_hashed(this, loc); + + ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol, + &parent_disk_layout); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "extracting in-memory layout of parent failed. ", + gf_fop_list[local->fop], pgfid, loc->name, loc->path); + goto err; + } + + ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY, + conf->xattr_name); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "setting %s key in params dictionary failed. ", + gf_fop_list[local->fop], pgfid, loc->name, loc->path, + GF_PREOP_PARENT_KEY); + goto err; + } + + ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout, + 4 * 4); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "setting parent-layout in params dictionary failed. ", + gf_fop_list[local->fop], pgfid, loc->name, loc->path); + goto err; + } + +err: + dht_layout_unref(this, parent_layout); + return ret; +} + +int dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *params) { @@ -8682,6 +8893,11 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, goto err; } + local->params = dict_ref(params); + local->flags = flags; + local->mode = mode; + local->umask = umask; + if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) { gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, "creating %s on %s (got create on %s)", local->loc.path, @@ -8697,10 +8913,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, if (hashed_subvol && (hashed_subvol != subvol)) { /* Create the linkto file and then the data file */ - local->params = dict_ref(params); - local->flags = flags; - local->mode = mode; - local->umask = umask; local->cached_subvol = subvol; local->hashed_subvol = hashed_subvol; @@ -8713,6 +8925,9 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, * file as we expect a lookup everywhere if there are problems * with the parent layout */ + + dht_set_parent_layout_in_dict(loc, this, local); + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, subvol->fops->create, &local->loc, flags, mode, umask, fd, params); @@ -8764,11 +8979,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, goto err; } - local->params = dict_ref(params); - local->flags = flags; - local->mode = mode; - local->umask = umask; - loc_wipe(&local->loc); ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno); @@ -8806,7 +9016,7 @@ err: return 0; } -int +static int dht_mkdir_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { @@ -8840,7 +9050,7 @@ dht_mkdir_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -int +static int dht_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) @@ -8908,13 +9118,13 @@ unlock: return 0; } -int +static int dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata); -int +static int dht_mkdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mode_t umask, dict_t *params) { @@ -9037,7 +9247,7 @@ err: return 0; } -int +static int dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, @@ -9054,8 +9264,6 @@ dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this, gf_boolean_t parent_layout_changed = _gf_false; call_stub_t *stub = NULL; - VALIDATE_OR_GOTO(this->private, err); - local = frame->local; prev = cookie; layout = local->layout; @@ -9170,7 +9378,7 @@ err: return 0; } -int +static int dht_mkdir_guard_parent_layout_cbk(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mode_t umask, dict_t *params) @@ -9321,7 +9529,7 @@ err: return 0; } -int +static int dht_rmdir_selfheal_cbk(call_frame_t *heal_frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { @@ -9343,7 +9551,7 @@ dht_rmdir_selfheal_cbk(call_frame_t *heal_frame, void *cookie, xlator_t *this, return 0; } -int +static int dht_rmdir_hashed_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) @@ -9446,7 +9654,63 @@ err: return 0; } -int +static int +dht_rmdir_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + DHT_STACK_DESTROY(frame); + return 0; +} + +static int +dht_rmdir_unlock(call_frame_t *frame, xlator_t *this) +{ + dht_local_t *local = NULL, *lock_local = NULL; + call_frame_t *lock_frame = NULL; + int lock_count = 0; + + local = frame->local; + + /* Unlock entrylk */ + dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns); + + /* Unlock inodelk */ + lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks, + local->lock[0].ns.parent_layout.lk_count); + + if (lock_count == 0) + goto done; + + lock_frame = copy_frame(frame); + if (lock_frame == NULL) + goto done; + + lock_local = dht_local_init(lock_frame, &local->loc, NULL, + lock_frame->root->op); + if (lock_local == NULL) + goto done; + + lock_local->lock[0].ns.parent_layout.locks = local->lock[0] + .ns.parent_layout.locks; + lock_local->lock[0] + .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count; + + local->lock[0].ns.parent_layout.locks = NULL; + local->lock[0].ns.parent_layout.lk_count = 0; + dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks, + lock_local->lock[0].ns.parent_layout.lk_count, + dht_rmdir_unlock_cbk); + lock_frame = NULL; + +done: + if (lock_frame != NULL) { + DHT_STACK_DESTROY(lock_frame); + } + + return 0; +} + +static int dht_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) @@ -9581,63 +9845,7 @@ err: return 0; } -int -dht_rmdir_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - DHT_STACK_DESTROY(frame); - return 0; -} - -int -dht_rmdir_unlock(call_frame_t *frame, xlator_t *this) -{ - dht_local_t *local = NULL, *lock_local = NULL; - call_frame_t *lock_frame = NULL; - int lock_count = 0; - - local = frame->local; - - /* Unlock entrylk */ - dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns); - - /* Unlock inodelk */ - lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks, - local->lock[0].ns.parent_layout.lk_count); - - if (lock_count == 0) - goto done; - - lock_frame = copy_frame(frame); - if (lock_frame == NULL) - goto done; - - lock_local = dht_local_init(lock_frame, &local->loc, NULL, - lock_frame->root->op); - if (lock_local == NULL) - goto done; - - lock_local->lock[0].ns.parent_layout.locks = local->lock[0] - .ns.parent_layout.locks; - lock_local->lock[0] - .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count; - - local->lock[0].ns.parent_layout.locks = NULL; - local->lock[0].ns.parent_layout.lk_count = 0; - dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks, - lock_local->lock[0].ns.parent_layout.lk_count, - dht_rmdir_unlock_cbk); - lock_frame = NULL; - -done: - if (lock_frame != NULL) { - DHT_STACK_DESTROY(lock_frame); - } - - return 0; -} - -int +static int dht_rmdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { @@ -9646,8 +9854,6 @@ dht_rmdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int i = 0; xlator_t *hashed_subvol; - VALIDATE_OR_GOTO(this->private, err); - conf = this->private; local = frame->local; @@ -9680,7 +9886,7 @@ err: return 0; } -int +static int dht_rmdir_do(call_frame_t *frame, xlator_t *this) { dht_local_t *local = NULL; @@ -9744,7 +9950,63 @@ err: return 0; } -int +static void +dht_rmdir_readdirp_done(call_frame_t *readdirp_frame, xlator_t *this) +{ + call_frame_t *main_frame = NULL; + dht_local_t *main_local = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + + local = readdirp_frame->local; + main_frame = local->main_frame; + main_local = main_frame->local; + + /* At least one readdirp failed. + * This is a bit hit or miss - if readdirp failed on more than + * one subvol, we don't know which error is returned. + */ + if (local->op_ret == -1) { + main_local->op_ret = local->op_ret; + main_local->op_errno = local->op_errno; + } + + this_call_cnt = dht_frame_return(main_frame); + + if (is_last_call(this_call_cnt)) + dht_rmdir_do(main_frame, this); + + DHT_STACK_DESTROY(readdirp_frame); +} + +/* Keep sending readdirp on the subvol until it returns no more entries + * It is possible that not all entries will fit in a single readdirp in + * which case the rmdir will keep failing with ENOTEMPTY + */ + +static int +dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this) +{ + dht_local_t *local = NULL; + + local = readdirp_frame->local; + + if (local->op_ret == -1) { + /* there is no point doing another readdirp on this + * subvol . */ + dht_rmdir_readdirp_done(readdirp_frame, this); + return 0; + } + + STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk, + local->hashed_subvol, local->hashed_subvol, + local->hashed_subvol->fops->readdirp, local->fd, 4096, 0, + local->xattr); + + return 0; +} + +static int dht_rmdir_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) @@ -9788,7 +10050,7 @@ dht_rmdir_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -int +static int dht_rmdir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr, struct iatt *parent) @@ -9843,7 +10105,7 @@ err: return 0; } -int +static int dht_rmdir_cached_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr, @@ -9923,7 +10185,7 @@ err: return 0; } -int +static int dht_rmdir_is_subvol_empty(call_frame_t *frame, xlator_t *this, gf_dirent_t *entries, xlator_t *src) { @@ -10084,36 +10346,7 @@ err: * No more entries on this subvol. Proceed to the actual rmdir operation. */ -void -dht_rmdir_readdirp_done(call_frame_t *readdirp_frame, xlator_t *this) -{ - call_frame_t *main_frame = NULL; - dht_local_t *main_local = NULL; - dht_local_t *local = NULL; - int this_call_cnt = 0; - - local = readdirp_frame->local; - main_frame = local->main_frame; - main_local = main_frame->local; - - /* At least one readdirp failed. - * This is a bit hit or miss - if readdirp failed on more than - * one subvol, we don't know which error is returned. - */ - if (local->op_ret == -1) { - main_local->op_ret = local->op_ret; - main_local->op_errno = local->op_errno; - } - - this_call_cnt = dht_frame_return(main_frame); - - if (is_last_call(this_call_cnt)) - dht_rmdir_do(main_frame, this); - - DHT_STACK_DESTROY(readdirp_frame); -} - -int +static int dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata) @@ -10163,34 +10396,7 @@ dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -/* Keep sending readdirp on the subvol until it returns no more entries - * It is possible that not all entries will fit in a single readdirp in - * which case the rmdir will keep failing with ENOTEMPTY - */ - -int -dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this) -{ - dht_local_t *local = NULL; - - local = readdirp_frame->local; - - if (local->op_ret == -1) { - /* there is no point doing another readdirp on this - * subvol . */ - dht_rmdir_readdirp_done(readdirp_frame, this); - return 0; - } - - STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk, - local->hashed_subvol, local->hashed_subvol, - local->hashed_subvol->fops->readdirp, local->fd, 4096, 0, - local->xattr); - - return 0; -} - -int +static int dht_rmdir_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, fd_t *fd, dict_t *xdata) { @@ -10383,7 +10589,7 @@ err: return 0; } -int +static int dht_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) @@ -10443,7 +10649,7 @@ err: return 0; } -int +static int dht_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) @@ -10490,7 +10696,7 @@ err: return 0; } -int32_t +static int32_t dht_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { @@ -10607,23 +10813,17 @@ dht_notify(xlator_t *this, int event, void *data, ...) int had_heard_from_all = 0; int have_heard_from_all = 0; - struct timeval time = { - 0, - }; gf_defrag_info_t *defrag = NULL; dict_t *dict = NULL; gf_defrag_type cmd = 0; dict_t *output = NULL; va_list ap; - dht_methods_t *methods = NULL; struct gf_upcall *up_data = NULL; struct gf_upcall_cache_invalidation *up_ci = NULL; conf = this->private; GF_VALIDATE_OR_GOTO(this->name, conf, out); - methods = &(conf->methods); - /* had all subvolumes reported status once till now? */ had_heard_from_all = 1; for (i = 0; i < conf->subvolume_cnt; i++) { @@ -10653,12 +10853,11 @@ dht_notify(xlator_t *this, int event, void *data, ...) break; } - gettimeofday(&time, NULL); LOCK(&conf->subvolume_lock); { conf->subvolume_status[cnt] = 1; conf->last_event[cnt] = event; - conf->subvol_up_time[cnt] = time.tv_sec; + conf->subvol_up_time[cnt] = gf_time(); } UNLOCK(&conf->subvolume_lock); @@ -10766,21 +10965,13 @@ dht_notify(xlator_t *this, int event, void *data, ...) if (defrag->is_exiting) goto unlock; if ((cmd == GF_DEFRAG_CMD_STATUS) || - (cmd == GF_DEFRAG_CMD_STATUS_TIER) || (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) gf_defrag_status_get(conf, output); - else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER) - gf_defrag_start_detach_tier(defrag); else if (cmd == GF_DEFRAG_CMD_DETACH_START) defrag->cmd = GF_DEFRAG_CMD_DETACH_START; else if (cmd == GF_DEFRAG_CMD_STOP || - cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER || cmd == GF_DEFRAG_CMD_DETACH_STOP) gf_defrag_stop(conf, GF_DEFRAG_STATUS_STOPPED, output); - else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER) - ret = gf_defrag_pause_tier(this, defrag); - else if (cmd == GF_DEFRAG_CMD_RESUME_TIER) - ret = gf_defrag_resume_tier(this, defrag); } unlock: UNLOCK(&defrag->lock); @@ -10828,6 +11019,7 @@ dht_notify(xlator_t *this, int event, void *data, ...) } if (!had_heard_from_all && have_heard_from_all) { + static int run_defrag = 0; /* This is the first event which completes aggregation of events from all subvolumes. If at least one subvol had come up, propagate CHILD_UP, but only this time @@ -10854,15 +11046,13 @@ dht_notify(xlator_t *this, int event, void *data, ...) * thread has already started. */ if (conf->defrag && !run_defrag) { - if (methods->migration_needed(this)) { - run_defrag = 1; - ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, - this, "dhtdg"); - if (ret) { - GF_FREE(conf->defrag); - conf->defrag = NULL; - kill(getpid(), SIGTERM); - } + run_defrag = 1; + ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, + this, "dhtdg"); + if (ret) { + GF_FREE(conf->defrag); + conf->defrag = NULL; + kill(getpid(), SIGTERM); } } } @@ -10943,8 +11133,7 @@ dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc, for (i = 0; i < layout->cnt; i++) { ret = snprintf(string, sizeof(string), "[Subvol_name: %s, Err: %d , Start: " - "%" PRIu32 " , Stop: %" PRIu32 " , Hash: %" PRIu32 - " ], ", + "0x%x, Stop: 0x%x, Hash: 0x%x], ", layout->list[i].xlator->name, layout->list[i].err, layout->list[i].start, layout->list[i].stop, layout->list[i].commit_hash); @@ -10973,8 +11162,7 @@ dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc, for (i = 0; i < layout->cnt; i++) { ret = snprintf(output_string + off, len - off, "[Subvol_name: %s, Err: %d , Start: " - "%" PRIu32 " , Stop: %" PRIu32 " , Hash: %" PRIu32 - " ], ", + "0x%x, Stop: 0x%x, Hash: 0x%x], ", layout->list[i].xlator->name, layout->list[i].err, layout->list[i].start, layout->list[i].stop, layout->list[i].commit_hash); @@ -11009,28 +11197,6 @@ out: return ret; } -int32_t -dht_migration_needed(xlator_t *this) -{ - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - int ret = 0; - - conf = this->private; - - GF_VALIDATE_OR_GOTO("dht", conf, out); - GF_VALIDATE_OR_GOTO("dht", conf->defrag, out); - - defrag = conf->defrag; - - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER)) - ret = 1; - -out: - return ret; -} - /* This function should not be called more then once during a FOP handling path. It is valid only for for ops on files @@ -11065,72 +11231,13 @@ dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf, return 0; } -gf_boolean_t -dht_is_tier_xlator(xlator_t *this) -{ - if (strcmp(this->type, "cluster/tier") == 0) - return _gf_true; - return _gf_false; -} - int32_t dht_release(xlator_t *this, fd_t *fd) { return dht_fd_ctx_destroy(this, fd); } -int -dht_remove_stale_linkto(void *data) -{ - call_frame_t *frame = NULL; - dht_local_t *local = NULL; - xlator_t *this = NULL; - dict_t *xdata_in = NULL; - int ret = 0; - - GF_VALIDATE_OR_GOTO("dht", data, out); - - frame = data; - local = frame->local; - this = frame->this; - GF_VALIDATE_OR_GOTO("dht", this, out); - GF_VALIDATE_OR_GOTO("dht", local, out); - GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out); - - xdata_in = dict_new(); - if (!xdata_in) - goto out; - - ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(xdata_in); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, -ret, 0, - "Failed to set keys for stale linkto" - "deletion on path %s", - local->loc.path); - goto out; - } - - ret = syncop_unlink(local->link_subvol, &local->loc, xdata_in, NULL); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, -ret, 0, - "Removal of linkto failed" - " on path %s at subvol %s", - local->loc.path, local->link_subvol->name); - } -out: - if (xdata_in) - dict_unref(xdata_in); - return ret; -} - -int -dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data) -{ - DHT_STACK_DESTROY(sync_frame); - return 0; -} - -int +static int dht_pt_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) @@ -11216,6 +11323,8 @@ dht_pt_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, conf = this->private; dict_del(xattr, conf->xattr_name); + dict_del(xattr, conf->mds_xattr_key); + dict_del(xattr, conf->commithash_xattr_name); if (frame->root->pid >= 0) { GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr); @@ -11261,3 +11370,22 @@ dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata); return 0; } + +/* The job of this function is to check if all the xlators have updated + * error in the layout. */ +int +dht_dir_layout_error_check(xlator_t *this, inode_t *inode) +{ + dht_layout_t *layout = NULL; + int i = 0; + + layout = dht_layout_get(this, inode); + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err == 0) { + return 0; + } + } + + /* Returning the first xlator error as all xlators have errors */ + return layout->list[0].err; +} |
