diff options
Diffstat (limited to 'xlators/cluster/ec/src/ec-common.c')
| -rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 159 |
1 files changed, 122 insertions, 37 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index e85aa8bf43f..b955efd8c2d 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -101,6 +101,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask) { uintptr_t need_open = 0; int ret = 0; + int32_t flags = 0; loc_t loc = { 0, }; @@ -121,6 +122,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask) goto out; } + flags = fop->fd->flags & (~(O_TRUNC | O_APPEND | O_CREAT | O_EXCL)); if (IA_IFDIR == fop->fd->inode->ia_type) { ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, @@ -128,7 +130,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask) } else { ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc, - fop->fd->flags & (~O_TRUNC), fop->fd, NULL); + flags, fop->fd, NULL); } out: @@ -228,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx) int32_t ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good, - uintptr_t bad, dict_t *xdata) + uintptr_t bad, uint32_t pending, dict_t *xdata) { if (op_ret < 0) { gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL, @@ -314,17 +316,19 @@ ec_check_status(ec_fop_data_t *fop) } } - gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS, - "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " - "remaining=%s, good=%s, bad=%s, %s)", - gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, - ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), - ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), - ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), - ec_bin(str4, sizeof(str4), fop->good, ec->nodes), - ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good), - ec->nodes), - ec_msg_str(fop)); + gf_msg( + fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS, + "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " + "remaining=%s, good=%s, bad=%s," + "(Least significant bit represents first client/brick of subvol), %s)", + gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), + ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), + ec_bin(str4, sizeof(str4), fop->good, ec->nodes), + ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good), + ec->nodes), + ec_msg_str(fop)); if (fop->use_fd) { if (fop->fd != NULL) { ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, @@ -612,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop) loc_t *loc2 = NULL; char gfid1[64] = {0}; char gfid2[64] = {0}; + ec_fop_data_t *parent = fop->parent; if (fop->errstr) return fop->errstr; - if (!fop->use_fd) { loc1 = &fop->loc[0]; loc2 = &fop->loc[1]; @@ -623,23 +627,45 @@ ec_msg_str(ec_fop_data_t *fop) if (fop->id == GF_FOP_RENAME) { gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' and '%s' with gfids " - "%s and %s respectively", + "%s and %s respectively. Parent FOP: %s", ec_fop_name(fop->id), loc1->path, loc2->path, uuid_utoa_r(loc1->gfid, gfid1), - uuid_utoa_r(loc2->gfid, gfid2)); + uuid_utoa_r(loc2->gfid, gfid2), + parent ? ec_fop_name(parent->id) : "No Parent"); } else { - gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s", - ec_fop_name(fop->id), loc1->path, - uuid_utoa_r(loc1->gfid, gfid1)); + gf_asprintf( + &fop->errstr, + "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s", + ec_fop_name(fop->id), loc1->path, + uuid_utoa_r(loc1->gfid, gfid1), + parent ? ec_fop_name(parent->id) : "No Parent"); } } else { - gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s", - ec_fop_name(fop->id), - uuid_utoa_r(fop->fd->inode->gfid, gfid1)); + gf_asprintf( + &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s", + ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1), + parent ? ec_fop_name(parent->id) : "No Parent"); } return fop->errstr; } +static void +ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need, + int32_t loglevel) +{ + ec_t *ec = fop->xl->private; + char str1[32], str2[32], str3[32]; + + gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT, + "Insufficient available children for this request: " + "Have : %d, Need : %u : Child UP : %s " + "Mask: %s, Healing : %s : %s ", + have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), + ec_bin(str3, sizeof(str3), fop->healing, ec->nodes), + ec_msg_str(fop)); +} + static int32_t ec_child_select(ec_fop_data_t *fop) { @@ -654,6 +680,9 @@ ec_child_select(ec_fop_data_t *fop) * unlock should go on all subvols where lock is performed*/ if (fop->parent && !ec_internal_op(fop)) { fop->mask &= (fop->parent->mask & ~fop->parent->healing); + if (ec_is_data_fop(fop->id)) { + fop->healing |= fop->parent->healing; + } } if ((fop->mask & ~ec->xl_up) != 0) { @@ -694,14 +723,19 @@ ec_child_select(ec_fop_data_t *fop) ec_trace("SELECT", fop, ""); if ((num < fop->minimum) && (num < ec->fragments)) { - gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT, - "Insufficient available children " - "for this request (have %d, need " - "%d). %s", - num, fop->minimum, ec_msg_str(fop)); + ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR); return 0; } + if (!fop->parent && fop->lock_count && + (fop->locks[0].update[EC_DATA_TXN] || + fop->locks[0].update[EC_METADATA_TXN])) { + if (ec->quorum_count && (num < ec->quorum_count)) { + ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR); + return 0; + } + } + return 1; } @@ -1405,27 +1439,28 @@ ec_get_size_version(ec_lock_link_t *link) !ec_is_data_fop(fop->id)) link->optimistic_changelog = _gf_true; + memset(&loc, 0, sizeof(loc)); + + LOCK(&lock->loc.inode->lock); + set_dirty = ec_set_dirty_flag(link, ctx, dirty); /* If ec metadata has already been retrieved, do not try again. */ - if (ctx->have_info && (!set_dirty)) { + if (ctx->have_info) { if (ec_is_data_fop(fop->id)) { fop->healing |= lock->healing; } - return; + if (!set_dirty) + goto unlock; } /* Determine if there's something we need to retrieve for the current * operation. */ if (!set_dirty && !lock->query && (lock->loc.inode->ia_type != IA_IFREG) && (lock->loc.inode->ia_type != IA_INVAL)) { - return; + goto unlock; } - memset(&loc, 0, sizeof(loc)); - - LOCK(&lock->loc.inode->lock); - changed_flags = ec_set_xattrop_flags_and_params(lock, link, dirty); if (link->waiting_flags) { /* This fop needs to wait until all its flags are cleared which @@ -1436,6 +1471,7 @@ ec_get_size_version(ec_lock_link_t *link) GF_ASSERT(!changed_flags); } +unlock: UNLOCK(&lock->loc.inode->lock); if (!changed_flags) @@ -1847,6 +1883,10 @@ ec_lock_acquired(ec_lock_link_t *link) LOCK(&lock->loc.inode->lock); lock->acquired = _gf_true; + if (lock->contention) { + lock->release = _gf_true; + lock->contention = _gf_false; + } ec_lock_update_fd(lock, fop); ec_lock_wake_shared(lock, &list); @@ -1872,15 +1912,20 @@ ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ec_lock_link_t *link = NULL; ec_lock_t *lock = NULL; + link = fop->data; + lock = link->lock; if (op_ret >= 0) { - link = fop->data; - lock = link->lock; lock->mask = lock->good_mask = fop->good; lock->healing = 0; ec_lock_acquired(link); ec_lock(fop->parent); } else { + LOCK(&lock->loc.inode->lock); + { + lock->contention = _gf_false; + } + UNLOCK(&lock->loc.inode->lock); gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED, "Failed to complete preop lock"); } @@ -2211,7 +2256,7 @@ ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, if (op_ret < 0) { gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED, - "entry/inode unlocking failed (%s)", ec_fop_name(link->fop->id)); + "entry/inode unlocking failed :(%s)", ec_msg_str(link->fop)); } else { ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock); } @@ -2248,6 +2293,23 @@ ec_unlock_lock(ec_lock_link_t *link) } } +void +ec_inode_bad_inc(inode_t *inode, xlator_t *xl) +{ + ec_inode_t *ctx = NULL; + + LOCK(&inode->lock); + { + ctx = __ec_inode_get(inode, xl); + if (ctx == NULL) { + goto unlock; + } + ctx->bad_version++; + } +unlock: + UNLOCK(&inode->lock); +} + int32_t ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xattr, @@ -2263,6 +2325,12 @@ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this, ctx = lock->ctx; if (op_ret < 0) { + if (link->lock->fd == NULL) { + ec_inode_bad_inc(link->lock->loc.inode, this); + } else { + ec_inode_bad_inc(link->lock->fd->inode, this); + } + gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno, EC_MSG_SIZE_VERS_UPDATE_FAIL, "Failed to update version and size. %s", ec_msg_str(fop)); @@ -2504,6 +2572,13 @@ ec_lock_release(ec_t *ec, inode_t *inode) gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention", inode); + if (!lock->acquired) { + /* This happens if some bricks already got the lock while inodelk is in + * progress. Set release to true after lock is acquired*/ + lock->contention = _gf_true; + goto done; + } + /* The lock is not marked to be released, so the frozen list should be * empty. */ GF_ASSERT(list_empty(&lock->frozen)); @@ -2955,3 +3030,13 @@ ec_manager(ec_fop_data_t *fop, int32_t error) __ec_manager(fop, error); } + +gf_boolean_t +__ec_is_last_fop(ec_t *ec) +{ + if ((list_empty(&ec->pending_fops)) && + (GF_ATOMIC_GET(ec->async_fop_count) == 0)) { + return _gf_true; + } + return _gf_false; +} |
