summaryrefslogtreecommitdiffstats
BranchCommit messageAuthorAge
developmentcluster/afr: Add the non-refactored afr code into the treeRavishankar N12 years
mastercluster/afr: Remove eager-lock stub on finodelk failurePranith Kumar K12 years
release-2.0s/Patchwork/Gerrit/Anand Avati15 years
release-3.0Merge branch 'release-3.0' of ssh://git.gluster.com/glusterfs into release-3.0Vijay Bellur14 years
release-3.1mount/fuse: Inherit direct-io-mode values from fds alreadyRaghavendra G14 years
release-3.2features/marker: Replacing -1 with GF_CLIENT_PID_GSYNCD as part of code cleanup.Mohammed Junaid13 years
release-3.3build: really disable fusermount if you say soNiels de Vos12 years
release-3.4doc/release-notes: release notes for 3.4.3Kaleb S. KEITHLEY12 years
release-3.5features/glupy: Rename Glupy python module to avoid namespace conflictJustin Clift12 years
 
TagDownloadAuthorAge
v3.4.3commit b0d6d20ab2...Kaleb S. KEITHLEY12 years
v3.4.3beta2commit 33cc417e64...Kaleb S. KEITHLEY12 years
v3.4.3beta1commit 010a9a7867...Kaleb S. KEITHLEY12 years
v3.5.0beta4commit e779cc8c32...Vijay Bellur12 years
v3.4.3alpha1commit 945c6de4e6...Kaleb S. KEITHLEY12 years
v3.5.0beta3commit b319f01ecd...Vijay Bellur12 years
v3.5.0beta2commit a338c4fbc4...Vijay Bellur12 years
v3.5beta1commit 1350c7193e...Vijay Bellur12 years
v3.4.2commit 098fd71353...Vijay Bellur12 years
v3.4.2qa5commit b2ee85b3e4...Vijay Bellur12 years
[...]
 
AgeCommit messageAuthorFilesLines
2011-09-19performance/quick-read: fix memory corruption.v3.2.4qa1Raghavendra G1-4/+0
2011-09-18cli: check wordcount to validate volume set helpKaushik BV1-2/+2
2011-09-15glusterfsd: log the package version just after log initAmar Tumballi1-0/+5
2011-09-12nfs-rpc: Use correct variable for volume-specific insecure ports checkShehjar Tikoo1-1/+1
2011-09-12features/marker-quota: Perform xattr related operations with root permissions...Junaid2-6/+39
2011-09-12features/marker-quota: Prefix the function names with mq (marker-quota).Junaid5-310/+310
2011-09-11glusterd: Leave existing 'unreadable' rbstate alone.Krishnan Parthasarathi1-13/+23
2011-09-11Whitespace cleanup and change license to GPLv3+ in rpm specVijay Bellur1-15/+15
2011-09-11memleak fixes for markerRaghavendra Bhat1-3/+5
2011-09-09Fix misleading 'quota remove' output.Junaid2-18/+50
[...]
xlators/cluster/afr/src/afr-transaction.c169
-rw-r--r--xlators/cluster/afr/src/afr.c125
-rw-r--r--xlators/cluster/afr/src/afr.h355
-rw-r--r--xlators/cluster/dht/src/dht-common.c1930
-rw-r--r--xlators/cluster/dht/src/dht-common.h410
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c16
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c40
-rw-r--r--xlators/cluster/dht/src/dht-helper.c315
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c137
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c72
-rw-r--r--xlators/cluster/dht/src/dht-layout.c95
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c53
-rw-r--r--xlators/cluster/dht/src/dht-lock.c190
-rw-r--r--xlators/cluster/dht/src/dht-lock.h1
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h3
-rw-r--r--xlators/cluster/dht/src/dht-messages.h320
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1131
-rw-r--r--xlators/cluster/dht/src/dht-rename.c6
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c592
-rw-r--r--xlators/cluster/dht/src/dht-shared.c147
-rw-r--r--xlators/cluster/dht/src/dht.c1
-rw-r--r--xlators/cluster/dht/src/nufa.c1
-rw-r--r--xlators/cluster/dht/src/switch.c9
-rw-r--r--xlators/cluster/ec/src/ec-combine.c40
-rw-r--r--xlators/cluster/ec/src/ec-common.c159
-rw-r--r--xlators/cluster/ec/src/ec-common.h31
-rw-r--r--xlators/cluster/ec/src/ec-data.c5
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c27
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c60
-rw-r--r--xlators/cluster/ec/src/ec-galois.c3
-rw-r--r--xlators/cluster/ec/src/ec-generic.c54
-rw-r--r--xlators/cluster/ec/src/ec-heal.c194
-rw-r--r--xlators/cluster/ec/src/ec-heald.c157
-rw-r--r--xlators/cluster/ec/src/ec-heald.h9
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c9
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c49
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c117
-rw-r--r--xlators/cluster/ec/src/ec-locks.c73
-rw-r--r--xlators/cluster/ec/src/ec-messages.h3
-rw-r--r--xlators/cluster/ec/src/ec-method.h2
-rw-r--r--xlators/cluster/ec/src/ec-types.h22
-rw-r--r--xlators/cluster/ec/src/ec.c213
-rw-r--r--xlators/cluster/ec/src/ec.h1
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.c2
-rw-r--r--xlators/debug/error-gen/src/error-gen.c47
-rw-r--r--xlators/debug/io-stats/src/io-stats.c256
-rw-r--r--xlators/debug/trace/src/trace.c20
-rw-r--r--xlators/features/Makefile.am6
-rw-r--r--xlators/features/barrier/src/barrier.c2
-rw-r--r--xlators/features/barrier/src/barrier.h3
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h51
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c12
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h20
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c80
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c302
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.h20
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c97
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h1
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h75
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c358
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.h22
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py4
-rw-r--r--xlators/features/changelog/lib/src/changelog-lib-messages.h30
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-api.c4
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c6
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h3
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal-handler.c25
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-reborp.c32
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c5
-rw-r--r--xlators/features/changelog/lib/src/gf-history-changelog.c68
-rw-r--r--xlators/features/changelog/src/changelog-barrier.c15
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.c9
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c306
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h37
-rw-r--r--xlators/features/changelog/src/changelog-messages.h122
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.c51
-rw-r--r--xlators/features/changelog/src/changelog-rpc.c19
-rw-r--r--xlators/features/changelog/src/changelog.c347
-rw-r--r--xlators/features/cloudsync/src/Makefile.am4
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.c16
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.h35
-rwxr-xr-xxlators/features/cloudsync/src/cloudsync-fops-c.py38
-rw-r--r--xlators/features/cloudsync/src/cloudsync-mem-types.h1
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am6
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c2
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am (renamed from xlators/performance/decompounder/Makefile.am)2
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am12
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h203
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h30
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym1
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h19
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c842
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h84
-rw-r--r--xlators/features/cloudsync/src/cloudsync.c567
-rw-r--r--xlators/features/cloudsync/src/cloudsync.h22
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c6
-rw-r--r--xlators/features/index/src/index.c5
-rw-r--r--xlators/features/leases/src/leases-internal.c72
-rw-r--r--xlators/features/leases/src/leases.c18
-rw-r--r--xlators/features/leases/src/leases.h46
-rw-r--r--xlators/features/locks/src/clear.c4
-rw-r--r--xlators/features/locks/src/common.c368
-rw-r--r--xlators/features/locks/src/common.h51
-rw-r--r--xlators/features/locks/src/entrylk.c47
-rw-r--r--xlators/features/locks/src/inodelk.c199
-rw-r--r--xlators/features/locks/src/locks.h50
-rw-r--r--xlators/features/locks/src/posix.c432
-rw-r--r--xlators/features/locks/src/reservelk.c2
-rw-r--r--xlators/features/marker/src/marker-common.c7
-rw-r--r--xlators/features/marker/src/marker-common.h2
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c93
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h12
-rw-r--r--xlators/features/marker/src/marker-quota.c78
-rw-r--r--xlators/features/marker/src/marker-quota.h9
-rw-r--r--xlators/features/marker/src/marker.c25
-rw-r--r--xlators/features/metadisp/Makefile.am3
-rw-r--r--xlators/features/metadisp/src/Makefile.am38
-rw-r--r--xlators/features/metadisp/src/backend.c45
-rw-r--r--xlators/features/metadisp/src/fops-tmpl.c10
-rw-r--r--xlators/features/metadisp/src/gen-fops.py160
-rw-r--r--xlators/features/metadisp/src/metadisp-create.c101
-rw-r--r--xlators/features/metadisp/src/metadisp-fops.h51
-rw-r--r--xlators/features/metadisp/src/metadisp-fsync.c54
-rw-r--r--xlators/features/metadisp/src/metadisp-lookup.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-open.c70
-rw-r--r--xlators/features/metadisp/src/metadisp-readdir.c65
-rw-r--r--xlators/features/metadisp/src/metadisp-setattr.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-stat.c124
-rw-r--r--xlators/features/metadisp/src/metadisp-unlink.c160
-rw-r--r--xlators/features/metadisp/src/metadisp.c46
-rw-r--r--xlators/features/metadisp/src/metadisp.h45
-rw-r--r--xlators/features/namespace/src/namespace.c1
-rw-r--r--xlators/features/quiesce/src/quiesce.c43
-rw-r--r--xlators/features/quota/src/quota-enforcer-client.c20
-rw-r--r--xlators/features/quota/src/quota.c149
-rw-r--r--xlators/features/quota/src/quota.h11
-rw-r--r--xlators/features/quota/src/quotad-aggregator.c71
-rw-r--r--xlators/features/quota/src/quotad-aggregator.h4
-rw-r--r--xlators/features/quota/src/quotad-helpers.c6
-rw-r--r--xlators/features/quota/src/quotad.c12
-rw-r--r--xlators/features/read-only/src/read-only.c1
-rw-r--r--xlators/features/read-only/src/read-only.h13
-rw-r--r--xlators/features/read-only/src/worm-helper.c15
-rw-r--r--xlators/features/read-only/src/worm.c108
-rw-r--r--xlators/features/sdfs/src/sdfs.c10
-rw-r--r--xlators/features/selinux/src/selinux.c8
-rw-r--r--xlators/features/shard/src/shard.c607
-rw-r--r--xlators/features/shard/src/shard.h7
-rw-r--r--xlators/features/snapview-client/src/snapview-client-messages.h35
-rw-r--r--xlators/features/snapview-client/src/snapview-client.c384
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mgmt.c40
-rw-r--r--xlators/features/snapview-server/src/snapview-server.c12
-rw-r--r--xlators/features/snapview-server/src/snapview-server.h11
-rw-r--r--xlators/features/trash/src/trash.c7
-rw-r--r--xlators/features/upcall/src/upcall-cache-invalidation.h6
-rw-r--r--xlators/features/upcall/src/upcall-internal.c172
-rw-r--r--xlators/features/upcall/src/upcall.c108
-rw-r--r--xlators/features/upcall/src/upcall.h28
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-c.py10
-rw-r--r--xlators/features/utime/src/utime-helpers.h1
-rw-r--r--xlators/features/utime/src/utime.c31
-rw-r--r--xlators/lib/src/libxlator.c8
-rw-r--r--xlators/lib/src/libxlator.h8
-rw-r--r--xlators/meta/src/meta-helpers.c9
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am29
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitrot.c87
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c1111
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c69
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-errno.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-ganesha.c927
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c186
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c5
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c14
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c1071
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c344
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c108
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c112
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c20
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h156
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c247
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c592
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.c3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c1314
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.c312
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.h9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c32
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c84
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.c4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c278
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c28
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-reset-brick.c14
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c138
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.c4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.c14
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c153
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.c662
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.h17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c220
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.c23
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c372
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c389
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.c8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c1555
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h36
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.c828
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.h43
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c276
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h46
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c136
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tier.c1378
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.h37
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc.c503
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc.h41
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c3270
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h94
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c1514
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h34
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c906
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c302
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c325
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h462
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c557
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h58
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c111
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in55
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in19
-rw-r--r--xlators/nfs/server/src/acl3.c32
-rw-r--r--xlators/nfs/server/src/acl3.h2
-rw-r--r--xlators/nfs/server/src/auth-cache.c13
-rw-r--r--xlators/nfs/server/src/exports.c9
-rw-r--r--xlators/nfs/server/src/mount3.c72
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c2
-rw-r--r--xlators/nfs/server/src/nfs-common.c5
-rw-r--r--xlators/nfs/server/src/nfs-fops.c2
-rw-r--r--xlators/nfs/server/src/nfs.c30
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c6
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c4
-rw-r--r--xlators/nfs/server/src/nfs3.c54
-rw-r--r--xlators/nfs/server/src/nlm4.c84
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c5
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/decompounder/src/Makefile.am19
-rw-r--r--xlators/performance/decompounder/src/decompounder-mem-types.h17
-rw-r--r--xlators/performance/decompounder/src/decompounder-messages.h28
-rw-r--r--xlators/performance/decompounder/src/decompounder.c845
-rw-r--r--xlators/performance/decompounder/src/decompounder.h78
-rw-r--r--xlators/performance/io-cache/src/io-cache-messages.h39
-rw-r--r--xlators/performance/io-cache/src/io-cache.c191
-rw-r--r--xlators/performance/io-cache/src/io-cache.h31
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c14
-rw-r--r--xlators/performance/io-cache/src/page.c48
-rw-r--r--xlators/performance/io-threads/src/io-threads-messages.h14
-rw-r--r--xlators/performance/io-threads/src/io-threads.c85
-rw-r--r--xlators/performance/md-cache/src/md-cache.c491
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-helper.c55
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.c6
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.h2
-rw-r--r--xlators/performance/open-behind/src/open-behind-messages.h6
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1344
-rw-r--r--xlators/performance/quick-read/src/quick-read.c50
-rw-r--r--xlators/performance/quick-read/src/quick-read.h2
-rw-r--r--xlators/performance/read-ahead/src/page.c10
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c9
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c4
-rw-r--r--xlators/performance/write-behind/src/write-behind.c21
-rw-r--r--xlators/protocol/client/src/client-callback.c89
-rw-r--r--xlators/protocol/client/src/client-common.c30
-rw-r--r--xlators/protocol/client/src/client-handshake.c725
-rw-r--r--xlators/protocol/client/src/client-helpers.c2360
-rw-r--r--xlators/protocol/client/src/client-lk.c31
-rw-r--r--xlators/protocol/client/src/client-messages.h123
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c918
-rw-r--r--xlators/protocol/client/src/client-rpc-fops_v2.c933
-rw-r--r--xlators/protocol/client/src/client.c1285
-rw-r--r--xlators/protocol/client/src/client.h174
-rw-r--r--xlators/protocol/server/src/Makefile.am4
-rw-r--r--xlators/protocol/server/src/server-common.c32
-rw-r--r--xlators/protocol/server/src/server-handshake.c125
-rw-r--r--xlators/protocol/server/src/server-helpers.c4300
-rw-r--r--xlators/protocol/server/src/server-helpers.h33
-rw-r--r--xlators/protocol/server/src/server-messages.h179
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c394
-rw-r--r--xlators/protocol/server/src/server-rpc-fops_v2.c1592
-rw-r--r--xlators/protocol/server/src/server.c237
-rw-r--r--xlators/protocol/server/src/server.h62
-rw-r--r--xlators/storage/posix/src/posix-aio.c2
-rw-r--r--xlators/storage/posix/src/posix-aio.h3
-rw-r--r--xlators/storage/posix/src/posix-common.c250
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c399
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.c94
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.h11
-rw-r--r--xlators/storage/posix/src/posix-handle.c205
-rw-r--r--xlators/storage/posix/src/posix-handle.h33
-rw-r--r--xlators/storage/posix/src/posix-helpers.c655
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c636
-rw-r--r--xlators/storage/posix/src/posix-inode-handle.h14
-rw-r--r--xlators/storage/posix/src/posix-messages.h4
-rw-r--r--xlators/storage/posix/src/posix-metadata.c153
-rw-r--r--xlators/storage/posix/src/posix-metadata.h6
-rw-r--r--xlators/storage/posix/src/posix.h143
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c203
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h1
339 files changed, 30654 insertions, 30966 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 9a6475b6ec8..032ab5c8001 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -18,9 +18,7 @@
#include <glusterfs/glusterfs.h>
#include "afr.h"
#include <glusterfs/dict.h>
-#include <glusterfs/xlator.h>
#include <glusterfs/hashfn.h>
-#include <glusterfs/logging.h>
#include <glusterfs/list.h>
#include <glusterfs/call-stub.h>
#include <glusterfs/defaults.h>
@@ -47,6 +45,56 @@ afr_quorum_errno(afr_private_t *priv)
return ENOTCONN;
}
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid)
+{
+ if (!__is_root_gfid(pargfid)) {
+ return _gf_false;
+ }
+
+ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
+ /*For backward compatibility /.landfill is private*/
+ return _gf_true;
+ }
+
+ if (pid == GF_CLIENT_PID_GSYNCD) {
+ /*geo-rep needs to create/sync private directory on slave because
+ * it appears in changelog*/
+ return _gf_false;
+ }
+
+ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
+ if (strcmp(name, priv->anon_inode_name) == 0) {
+ /* anonymous-inode dir is private*/
+ return _gf_true;
+ }
+ } else {
+ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
+ 0) {
+ /* anonymous-inode dir prefix is private for geo-rep to work*/
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies)
+{
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ replies[i] = 1;
+ } else {
+ replies[i] = 0;
+ }
+ }
+}
+
int
afr_fav_child_reset_sink_xattrs(void *opaque);
@@ -56,6 +104,581 @@ afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *frame, void *opaque);
static void
afr_discover_done(call_frame_t *frame, xlator_t *this);
+int
+afr_dom_lock_acquire_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
+
+ local->cont.lk.dom_lock_op_ret[i] = op_ret;
+ local->cont.lk.dom_lock_op_errno[i] = op_errno;
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to acquire %s on %s",
+ uuid_utoa(local->fd->inode->gfid), AFR_LK_HEAL_DOM,
+ priv->children[i]->name);
+ } else {
+ local->cont.lk.dom_locked_nodes[i] = 1;
+ }
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
+
+int
+afr_dom_lock_acquire(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0;
+
+ priv = frame->this->private;
+ local = frame->local;
+ local->cont.lk.dom_locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+ if (!local->cont.lk.dom_locked_nodes) {
+ return -ENOMEM;
+ }
+ local->cont.lk.dom_lock_op_ret = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_ret),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_ret) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ local->cont.lk.dom_lock_op_errno = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_errno),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_errno) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ flock.l_type = F_WRLCK;
+
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLK, &flock, NULL);
+
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL))
+ goto blocking_lock;
+
+ /*If any of the bricks returned EAGAIN, we still need blocking locks.*/
+ if (AFR_COUNT(local->cont.lk.dom_locked_nodes, priv->child_count) !=
+ priv->child_count) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.dom_lock_op_ret[i] == -1 &&
+ local->cont.lk.dom_lock_op_errno[i] == EAGAIN)
+ goto blocking_lock;
+ }
+ }
+
+ return 0;
+
+blocking_lock:
+ afr_dom_lock_release(frame);
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLKW, &flock, NULL);
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL)) {
+ afr_dom_lock_release(frame);
+ return -afr_quorum_errno(priv);
+ }
+
+ return 0;
+}
+
+int
+afr_dom_lock_release_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to release %s on %s", local->loc.path,
+ AFR_LK_HEAL_DOM, priv->children[i]->name);
+ }
+ local->cont.lk.dom_locked_nodes[i] = 0;
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
+
+void
+afr_dom_lock_release(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = frame->this->private;
+ locked_on = local->cont.lk.dom_locked_nodes;
+ if (AFR_COUNT(locked_on, priv->child_count) == 0)
+ return;
+ flock.l_type = F_UNLCK;
+
+ AFR_ONLIST(locked_on, frame, afr_dom_lock_release_cbk, finodelk,
+ AFR_LK_HEAL_DOM, local->fd, F_SETLK, &flock, NULL);
+
+ return;
+}
+
+static void
+afr_lk_heal_info_cleanup(afr_lk_heal_info_t *info)
+{
+ if (!info)
+ return;
+ if (info->xdata_req)
+ dict_unref(info->xdata_req);
+ if (info->fd)
+ fd_unref(info->fd);
+ GF_FREE(info->locked_nodes);
+ GF_FREE(info->child_up_event_gen);
+ GF_FREE(info->child_down_event_gen);
+ GF_FREE(info);
+}
+
+static int
+afr_add_lock_to_saved_locks(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -ENOMEM;
+
+ info = GF_CALLOC(sizeof(*info), 1, gf_afr_mt_lk_heal_info_t);
+ if (!info) {
+ goto cleanup;
+ }
+ INIT_LIST_HEAD(&info->pos);
+ info->fd = fd_ref(local->fd);
+ info->cmd = local->cont.lk.cmd;
+ info->pid = frame->root->pid;
+ info->flock = local->cont.lk.user_flock;
+ info->xdata_req = dict_copy_with_ref(local->xdata_req, NULL);
+ if (!info->xdata_req) {
+ goto cleanup;
+ }
+ info->lk_owner = frame->root->lk_owner;
+ info->locked_nodes = GF_MALLOC(
+ sizeof(*info->locked_nodes) * priv->child_count, gf_afr_mt_char);
+ if (!info->locked_nodes) {
+ goto cleanup;
+ }
+ memcpy(info->locked_nodes, local->cont.lk.locked_nodes,
+ sizeof(*info->locked_nodes) * priv->child_count);
+ info->child_up_event_gen = GF_CALLOC(sizeof(*info->child_up_event_gen),
+ priv->child_count, gf_afr_mt_int32_t);
+ if (!info->child_up_event_gen) {
+ goto cleanup;
+ }
+ info->child_down_event_gen = GF_CALLOC(sizeof(*info->child_down_event_gen),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!info->child_down_event_gen) {
+ goto cleanup;
+ }
+
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(local->fd, this);
+ if (fd_ctx)
+ fd_ctx->lk_heal_info = info;
+ }
+ UNLOCK(&local->fd->lock);
+ if (!fd_ctx) {
+ goto cleanup;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&info->pos, &priv->saved_locks);
+ }
+ UNLOCK(&priv->lock);
+
+ return 0;
+cleanup:
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to add lock to healq",
+ uuid_utoa(local->fd->inode->gfid));
+ if (info) {
+ afr_lk_heal_info_cleanup(info);
+ if (fd_ctx) {
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx->lk_heal_info = NULL;
+ }
+ UNLOCK(&local->fd->lock);
+ }
+ }
+ return ret;
+}
+
+static int
+afr_remove_lock_from_saved_locks(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ struct gf_flock flock = local->cont.lk.user_flock;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -EINVAL;
+
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx || !fd_ctx->lk_heal_info) {
+ goto out;
+ }
+
+ info = fd_ctx->lk_heal_info;
+ if ((info->flock.l_start != flock.l_start) ||
+ (info->flock.l_whence != flock.l_whence) ||
+ (info->flock.l_len != flock.l_len)) {
+ /*TODO: Compare lkowners too.*/
+ goto out;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ UNLOCK(&priv->lock);
+
+ afr_lk_heal_info_cleanup(info);
+ fd_ctx->lk_heal_info = NULL;
+ ret = 0;
+out:
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to remove lock from healq",
+ uuid_utoa(local->fd->inode->gfid));
+ return ret;
+}
+
+int
+afr_lock_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed to heal lock on child %d for %s", i,
+ uuid_utoa(local->fd->inode->gfid));
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_getlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed getlk for %s", uuid_utoa(local->fd->inode->gfid));
+ } else {
+ local->cont.lk.getlk_rsp[i] = *lock;
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+static gf_boolean_t
+afr_does_lk_owner_match(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ afr_local_t *local = frame->local;
+ struct gf_flock flock = {
+ 0,
+ };
+ gf_boolean_t ret = _gf_true;
+ char *wind_on = alloca0(priv->child_count);
+ unsigned char *success_replies = alloca0(priv->child_count);
+ local->cont.lk.getlk_rsp = GF_CALLOC(sizeof(*local->cont.lk.getlk_rsp),
+ priv->child_count, gf_afr_mt_gf_lock);
+
+ flock = info->flock;
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ wind_on[i] = 1;
+ }
+
+ AFR_ONLIST(wind_on, frame, afr_getlk_cbk, lk, info->fd, F_GETLK, &flock,
+ info->xdata_req);
+
+ afr_fill_success_replies(local, priv, success_replies);
+ if (AFR_COUNT(success_replies, priv->child_count) == 0) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid || local->replies[i].op_ret != 0)
+ continue;
+ if (local->cont.lk.getlk_rsp[i].l_type == F_UNLCK)
+ continue;
+ /*TODO: Do we really need to compare lkowner if F_UNLCK is true?*/
+ if (!is_same_lkowner(&local->cont.lk.getlk_rsp[i].l_owner,
+ &info->lk_owner)) {
+ ret = _gf_false;
+ break;
+ }
+ }
+out:
+ afr_local_replies_wipe(local, priv);
+ GF_FREE(local->cont.lk.getlk_rsp);
+ local->cont.lk.getlk_rsp = NULL;
+ return ret;
+}
+
+static void
+afr_mark_fd_bad(fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ if (!fd)
+ return;
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(fd, this);
+ if (fd_ctx) {
+ fd_ctx->is_fd_bad = _gf_true;
+ fd_ctx->lk_heal_info = NULL;
+ }
+ }
+ UNLOCK(&fd->lock);
+}
+
+static void
+afr_add_lock_to_lkhealq(afr_private_t *priv, afr_lk_heal_info_t *info)
+{
+ LOCK(&priv->lock);
+ {
+ list_del(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
+}
+
+static void
+afr_lock_heal_do(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ int op_errno = 0;
+ int32_t *current_event_gen = NULL;
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ char *wind_on = alloca0(priv->child_count);
+ gf_boolean_t retry = _gf_true;
+
+ frame->root->pid = info->pid;
+ lk_owner_copy(&frame->root->lk_owner, &info->lk_owner);
+
+ op_errno = -afr_dom_lock_acquire(frame);
+ if ((op_errno != 0)) {
+ goto release;
+ }
+
+ if (!afr_does_lk_owner_match(frame, priv, info)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_LK_HEAL_DOM,
+ "Ignoring lock heal for %s since lk-onwers mismatch. "
+ "Lock possibly pre-empted by another client.",
+ uuid_utoa(info->fd->inode->gfid));
+ goto release;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ continue;
+ wind_on[i] = 1;
+ }
+
+ current_event_gen = alloca(priv->child_count);
+ memcpy(current_event_gen, info->child_up_event_gen,
+ priv->child_count * sizeof *current_event_gen);
+ AFR_ONLIST(wind_on, frame, afr_lock_heal_cbk, lk, info->fd, info->cmd,
+ &info->flock, info->xdata_req);
+
+ LOCK(&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ if ((!local->replies[i].valid) || (local->replies[i].op_ret != 0)) {
+ continue;
+ }
+
+ if ((current_event_gen[i] == info->child_up_event_gen[i]) &&
+ (current_event_gen[i] > info->child_down_event_gen[i])) {
+ info->locked_nodes[i] = 1;
+ retry = _gf_false;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->saved_locks);
+ } else {
+ /*We received subsequent child up/down events while heal was in
+ * progress; don't mark child as healed. Attempt again on the
+ * new child up*/
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_LK_HEAL_DOM,
+ "Event gen mismatch: skipped healing lock on child %d "
+ "for %s.",
+ i, uuid_utoa(info->fd->inode->gfid));
+ }
+ }
+ }
+ UNLOCK(&priv->lock);
+
+release:
+ afr_dom_lock_release(frame);
+ if (retry)
+ afr_add_lock_to_lkhealq(priv, info);
+ return;
+}
+
+static int
+afr_lock_heal_done(int ret, call_frame_t *frame, void *opaque)
+{
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static int
+afr_lock_heal(void *opaque)
+{
+ call_frame_t *frame = (call_frame_t *)opaque;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+ struct list_head healq = {
+ 0,
+ };
+ int ret = 0;
+
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame) {
+ return ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&healq);
+ LOCK(&priv->lock);
+ {
+ list_splice_init(&priv->lk_healq, &healq);
+ }
+ UNLOCK(&priv->lock);
+
+ list_for_each_entry_safe(info, tmp, &healq, pos)
+ {
+ GF_ASSERT((AFR_COUNT(info->locked_nodes, priv->child_count) <
+ priv->child_count));
+ ((afr_local_t *)(iter_frame->local))->fd = fd_ref(info->fd);
+ afr_lock_heal_do(iter_frame, priv, info);
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = ENOTCONN;
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOTCONN,
+ AFR_MSG_LK_HEAL_DOM,
+ "Aborting processing of lk_healq."
+ "Healing will be reattempted on next child up for locks "
+ "that are still in quorum.");
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&healq, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
+ break;
+ }
+ }
+
+ AFR_STACK_DESTROY(iter_frame);
+ return ret;
+}
+
+static int
+__afr_lock_heal_synctask(xlator_t *this, afr_private_t *priv, int child)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+
+ if (priv->shd.iamshd)
+ return 0;
+
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_up_event_gen[child] = priv->event_generation;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return -1;
+
+ ret = synctask_new(this->ctx->env, afr_lock_heal, afr_lock_heal_done, frame,
+ frame);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_LK_HEAL_DOM,
+ "Failed to launch lock heal synctask");
+
+ return ret;
+}
+
+static int
+__afr_mark_pending_lk_heal(xlator_t *this, afr_private_t *priv, int child)
+{
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+
+ if (priv->shd.iamshd)
+ return 0;
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_down_event_gen[child] = priv->event_generation;
+ if (info->locked_nodes[child] == 1)
+ info->locked_nodes[child] = 0;
+ if (!afr_has_quorum(info->locked_nodes, this, NULL)) {
+ /* Since the lock was lost on quorum no. of nodes, we should
+ * not attempt to heal it anymore. Some other client could have
+ * acquired the lock, modified data and released it and this
+ * client wouldn't know about it if we heal it.*/
+ afr_mark_fd_bad(info->fd, this);
+ list_del(&info->pos);
+ afr_lk_heal_info_cleanup(info);
+ /* We're not winding an unlock on the node where the lock is still
+ * present because when fencing logic switches over to the new
+ * client (since we marked the fd bad), it should preempt any
+ * existing lock. */
+ }
+ }
+ return 0;
+}
+
gf_boolean_t
afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
int32_t *op_errno)
@@ -70,6 +693,19 @@ afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
return _gf_true;
}
+gf_boolean_t
+afr_is_lock_mode_mandatory(dict_t *xdata)
+{
+ int ret = 0;
+ uint32_t lk_mode = GF_LK_ADVISORY;
+
+ ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_mode);
+ if (!ret && lk_mode == GF_LK_MANDATORY)
+ return _gf_true;
+
+ return _gf_false;
+}
+
call_frame_t *
afr_copy_frame(call_frame_t *base)
{
@@ -284,7 +920,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
metadatamap |= (1 << index);
}
if (metadatamap_old != metadatamap) {
- event = 0;
+ __afr_inode_need_refresh_set(inode, this);
}
break;
@@ -297,7 +933,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
datamap |= (1 << index);
}
if (datamap_old != datamap)
- event = 0;
+ __afr_inode_need_refresh_set(inode, this);
break;
default:
@@ -461,34 +1097,6 @@ out:
}
int
-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
-{
- int ret = -1;
- uint16_t datamap = 0;
- uint16_t metadatamap = 0;
- uint32_t event = 0;
- uint64_t val = 0;
- afr_inode_ctx_t *ctx = NULL;
-
- ret = __afr_inode_ctx_get(this, inode, &ctx);
- if (ret)
- return ret;
-
- val = ctx->read_subvol;
-
- metadatamap = (val & 0x000000000000ffff) >> 0;
- datamap = (val & 0x00000000ffff0000) >> 16;
- event = 0;
-
- val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
- (((uint64_t)event) << 32);
-
- ctx->read_subvol = val;
-
- return ret;
-}
-
-int
__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int *event_p)
{
@@ -559,22 +1167,6 @@ out:
}
int
-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- if (priv->child_count <= 16)
- ret = __afr_inode_event_gen_reset_small(inode, this);
- else
- ret = -1;
-
- return ret;
-}
-
-int
afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int *event_p)
{
@@ -640,12 +1232,11 @@ afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
return 0;
}
-int
+static int
afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
int *spb_choice)
{
int ret = -1;
-
GF_VALIDATE_OR_GOTO(this->name, inode, out);
LOCK(&inode->lock);
@@ -657,6 +1248,40 @@ out:
return ret;
}
+/*
+ * frame is used to get the favourite policy. Since
+ * afr_inode_split_brain_choice_get was called with afr_open, it is possible to
+ * have a frame with out local->replies. So in that case, frame is passed as
+ * null, hence this function will handle the frame NULL case.
+ */
+int
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol)
+{
+ int ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("afr", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out);
+
+ priv = this->private;
+
+ ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol);
+ if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) {
+ local = frame->local;
+ *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (*spb_subvol >= 0) {
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
int
afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int event)
@@ -723,30 +1348,22 @@ out:
return need_refresh;
}
-static int
-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+int
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
int ret = -1;
afr_inode_ctx_t *ctx = NULL;
- GF_VALIDATE_OR_GOTO(this->name, inode, out);
-
- LOCK(&inode->lock);
- {
- ret = __afr_inode_ctx_get(this, inode, &ctx);
- if (ret)
- goto unlock;
-
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret == 0) {
ctx->need_refresh = _gf_true;
}
-unlock:
- UNLOCK(&inode->lock);
-out:
+
return ret;
}
int
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
int ret = -1;
@@ -754,7 +1371,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
LOCK(&inode->lock);
{
- ret = __afr_inode_event_gen_reset(inode, this);
+ ret = __afr_inode_need_refresh_set(inode, this);
}
UNLOCK(&inode->lock);
out:
@@ -820,7 +1437,6 @@ afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque)
gf_boolean_t timer_set = _gf_false;
gf_boolean_t timer_cancelled = _gf_false;
gf_boolean_t timer_reset = _gf_false;
- gf_boolean_t need_invalidate = _gf_true;
int old_spb_choice = -1;
frame = data->frame;
@@ -932,7 +1548,6 @@ afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque)
timer_set = _gf_true;
if (timer_reset && !ctx->timer)
timer_cancelled = _gf_true;
- need_invalidate = _gf_false;
}
unlock:
UNLOCK(&inode->lock);
@@ -946,8 +1561,7 @@ post_unlock:
* reads from an older cached value despite a change in spb_choice to
* a new value.
*/
- if (need_invalidate)
- inode_invalidate(inode);
+ inode_invalidate(inode);
out:
GF_FREE(data);
AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
@@ -1054,6 +1668,8 @@ afr_readables_fill(call_frame_t *frame, xlator_t *this, inode_t *inode,
ia_type = inode->ia_type;
}
+ if (!xdata)
+ continue; /* mkdir_cbk sends NULL xdata_rsp. */
afr_accused_fill(this, xdata, data_accused,
(ia_type == IA_IFDIR) ? AFR_ENTRY_TRANSACTION
: AFR_DATA_TRANSACTION);
@@ -1175,7 +1791,6 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
inode_t *inode = NULL;
int event_generation = 0;
int read_subvol = -1;
- int op_errno = ENOMEM;
int ret = 0;
local = frame->local;
@@ -1191,7 +1806,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
ret = afr_inode_get_readable(frame, inode, this, local->readable,
&event_generation, local->transaction.type);
- if (ret == -EIO || (local->is_read_txn && !event_generation)) {
+ if (ret == -EIO) {
/* No readable subvolume even after refresh ==> splitbrain.*/
if (!priv->fav_child_policy) {
err = EIO;
@@ -1204,18 +1819,12 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
goto refresh_done;
}
- heal_frame = copy_frame(frame);
+ heal_frame = afr_frame_create(this, NULL);
if (!heal_frame) {
err = EIO;
goto refresh_done;
}
- heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
- heal_local = AFR_FRAME_INIT(heal_frame, op_errno);
- if (!heal_local) {
- err = EIO;
- AFR_STACK_DESTROY(heal_frame);
- goto refresh_done;
- }
+ heal_local = heal_frame->local;
heal_local->xdata_req = dict_new();
if (!heal_local->xdata_req) {
err = EIO;
@@ -1236,18 +1845,6 @@ refresh_done:
return 0;
}
-static void
-afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
- unsigned char *replies)
-{
- int i = 0;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->replies[i].valid && local->replies[i].op_ret == 0)
- replies[i] = 1;
- }
-}
-
int
afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
{
@@ -1257,7 +1854,6 @@ afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
gf_boolean_t start_heal = _gf_false;
afr_local_t *heal_local = NULL;
unsigned char *success_replies = NULL;
- int op_errno = ENOMEM;
int ret = 0;
if (error != 0) {
@@ -1291,15 +1887,10 @@ afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
ret = afr_replies_interpret(frame, this, local->refreshinode, &start_heal);
if (ret && afr_selfheal_enabled(this) && start_heal) {
- heal_frame = copy_frame(frame);
+ heal_frame = afr_frame_create(this, NULL);
if (!heal_frame)
goto refresh_done;
- heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
- heal_local = AFR_FRAME_INIT(heal_frame, op_errno);
- if (!heal_local) {
- AFR_STACK_DESTROY(heal_frame);
- goto refresh_done;
- }
+ heal_local = heal_frame->local;
heal_local->refreshinode = inode_ref(local->refreshinode);
heal_local->heal_frame = heal_frame;
if (!afr_throttled_selfheal(heal_frame, this)) {
@@ -1336,17 +1927,22 @@ afr_inode_refresh_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (xdata)
local->replies[call_child].xdata = dict_ref(xdata);
}
+
if (xdata) {
ret = dict_get_int8(xdata, "link-count", &need_heal);
- local->replies[call_child].need_heal = need_heal;
- } else {
- local->replies[call_child].need_heal = need_heal;
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to get link count");
+ }
}
+ local->replies[call_child].need_heal = need_heal;
call_count = afr_frame_return(frame);
if (call_count == 0) {
afr_set_need_heal(this, local);
ret = afr_inode_refresh_err(frame, this);
+ if (ret) {
+ gf_msg_debug(this->name, ret, "afr_inode_refresh_err failed");
+ }
afr_inode_refresh_done(frame, this, ret);
}
}
@@ -1615,19 +2211,18 @@ out:
}
int
-afr_least_pending_reads_child(afr_private_t *priv)
+afr_least_pending_reads_child(afr_private_t *priv, unsigned char *readable)
{
int i = 0;
- int child = 0;
+ int child = -1;
int64_t read_iter = -1;
int64_t pending_read = -1;
- pending_read = GF_ATOMIC_GET(priv->pending_reads[0]);
- for (i = 1; i < priv->child_count; i++) {
- if (AFR_IS_ARBITER_BRICK(priv, i))
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i])
continue;
read_iter = GF_ATOMIC_GET(priv->pending_reads[i]);
- if (read_iter < pending_read) {
+ if (child == -1 || read_iter < pending_read) {
pending_read = read_iter;
child = i;
}
@@ -1636,8 +2231,54 @@ afr_least_pending_reads_child(afr_private_t *priv)
return child;
}
+static int32_t
+afr_least_latency_child(afr_private_t *priv, unsigned char *readable)
+{
+ int32_t i = 0;
+ int child = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
+
+ if (child == -1 ||
+ priv->child_latency[i] < priv->child_latency[child]) {
+ child = i;
+ }
+ }
+ return child;
+}
+
+static int32_t
+afr_least_latency_times_pending_reads_child(afr_private_t *priv,
+ unsigned char *readable)
+{
+ int32_t i = 0;
+ int child = -1;
+ int64_t pending_read = 0;
+ int64_t latency = -1;
+ int64_t least_latency = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
+
+ pending_read = GF_ATOMIC_GET(priv->pending_reads[i]);
+ latency = (pending_read + 1) * priv->child_latency[i];
+
+ if (child == -1 || latency < least_latency) {
+ least_latency = latency;
+ child = i;
+ }
+ }
+ return child;
+}
+
int
-afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv)
+afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv,
+ unsigned char *readable)
{
uuid_t gfid_copy = {
0,
@@ -1646,14 +2287,14 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv)
int child = -1;
switch (priv->hash_mode) {
- case 0:
+ case AFR_READ_POLICY_FIRST_UP:
break;
- case 1:
+ case AFR_READ_POLICY_GFID_HASH:
gf_uuid_copy(gfid_copy, args->gfid);
child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
priv->child_count;
break;
- case 2:
+ case AFR_READ_POLICY_GFID_PID_HASH:
if (args->ia_type != IA_IFDIR) {
/*
* Why getpid? Because it's one of the cheapest calls
@@ -1665,14 +2306,21 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv)
* need is a low probability that multiple clients
* won't converge on the same subvolume.
*/
+ gf_uuid_copy(gfid_copy, args->gfid);
pid = getpid();
- memcpy(gfid_copy, &pid, sizeof(pid));
+ *(pid_t *)gfid_copy ^= pid;
}
child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
priv->child_count;
break;
- case 3:
- child = afr_least_pending_reads_child(priv);
+ case AFR_READ_POLICY_LESS_LOAD:
+ child = afr_least_pending_reads_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LEAST_LATENCY:
+ child = afr_least_latency_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LOAD_LATENCY_HYBRID:
+ child = afr_least_latency_times_pending_reads_child(priv, readable);
break;
}
@@ -1705,7 +2353,7 @@ afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
}
/* second preference - use hashed mode */
- read_subvol = afr_hash_child(&local_args, priv);
+ read_subvol = afr_hash_child(&local_args, priv, readable);
if (read_subvol >= 0 && readable[read_subvol])
return read_subvol;
@@ -2011,6 +2659,9 @@ afr_local_cleanup(afr_local_t *local, xlator_t *this)
{ /* lk */
GF_FREE(local->cont.lk.locked_nodes);
+ GF_FREE(local->cont.lk.dom_locked_nodes);
+ GF_FREE(local->cont.lk.dom_lock_op_ret);
+ GF_FREE(local->cont.lk.dom_lock_op_errno);
}
{ /* create */
@@ -2241,7 +2892,7 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int spb_choice = -1;
+ int spb_subvol = -1;
int child_count = -1;
if (*read_subvol != -1)
@@ -2251,11 +2902,12 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
local = frame->local;
child_count = priv->child_count;
- afr_inode_split_brain_choice_get(local->inode, this, &spb_choice);
- if ((spb_choice >= 0) &&
+ afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol);
+ if ((spb_subvol >= 0) &&
(AFR_COUNT(success_replies, child_count) == child_count)) {
- *read_subvol = spb_choice;
- } else if (!priv->quorum_count) {
+ *read_subvol = spb_subvol;
+ } else if (!priv->quorum_count ||
+ frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) {
*read_subvol = afr_first_up_child(frame, this);
} else if (priv->quorum_count &&
afr_has_quorum(data_readable, this, NULL)) {
@@ -2294,6 +2946,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
0,
};
gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t in_flight_create = _gf_false;
gf_boolean_t can_interpret = _gf_true;
inode_t *parent = NULL;
ia_type_t ia_type = IA_INVAL;
@@ -2337,17 +2990,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
if (!replies[i].valid)
continue;
- if (locked_entry && replies[i].op_ret == -1 &&
- replies[i].op_errno == ENOENT) {
- /* Second, check entry is still
- "underway" in creation */
- local->op_ret = -1;
- local->op_errno = ENOENT;
- goto error;
- }
-
- if (replies[i].op_ret == -1)
+ if (replies[i].op_ret == -1) {
+ if (locked_entry && replies[i].op_errno == ENOENT) {
+ in_flight_create = _gf_true;
+ }
continue;
+ }
if (read_subvol == -1 || !readable[read_subvol]) {
read_subvol = i;
@@ -2357,6 +3005,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
}
}
+ if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto error;
+ }
+
if (read_subvol == -1)
goto error;
/* We now have a read_subvol, which is readable[] (if there
@@ -2415,7 +3069,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
if (read_subvol == -1)
goto cant_interpret;
if (ret) {
- afr_inode_event_gen_reset(local->inode, this);
+ afr_inode_need_refresh_set(local->inode, this);
dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
}
} else {
@@ -2468,7 +3122,7 @@ error:
* others in that they must be given higher priority while
* returning to the user.
*
- * The hierarchy is ENODATA > ENOENT > ESTALE > others
+ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others
*/
int
@@ -2480,6 +3134,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno)
return ENOENT;
if (old_errno == ESTALE || new_errno == ESTALE)
return ESTALE;
+ if (old_errno == ENOSPC || new_errno == ENOSPC)
+ return ENOSPC;
return new_errno;
}
@@ -2971,6 +3627,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int read_subvol = -1;
+ int ret = 0;
unsigned char *data_readable = NULL;
unsigned char *success_replies = NULL;
@@ -2992,7 +3649,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
if (!afr_has_quorum(success_replies, this, frame))
goto unwind;
- afr_replies_interpret(frame, this, local->inode, NULL);
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ if (ret) {
+ afr_inode_need_refresh_set(local->inode, this);
+ }
read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
data_readable);
@@ -3044,7 +3704,7 @@ afr_ta_id_file_check(void *opaque)
this = opaque;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_false);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc.name);
@@ -3235,10 +3895,15 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
local->inode = inode_ref(loc->inode);
- if (xattr_req)
+ if (xattr_req) {
/* If xattr_req was null, afr_lookup_xattr_req_prepare() will
allocate one for us */
- local->xattr_req = dict_ref(xattr_req);
+ local->xattr_req = dict_copy_with_ref(xattr_req, NULL);
+ if (!local->xattr_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
if (gf_uuid_is_null(loc->inode->gfid)) {
afr_discover_do(frame, this, 0);
@@ -3248,11 +3913,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
- local->event_generation))
- afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
- else
- afr_discover_do(frame, this, 0);
+ afr_discover_do(frame, this, 0);
return 0;
out:
@@ -3353,11 +4014,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
return 0;
}
- if (__is_root_gfid(loc->parent->gfid)) {
- if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) {
- op_errno = EPERM;
- goto out;
- }
+ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
+ frame->root->pid)) {
+ op_errno = EPERM;
+ goto out;
}
local = AFR_FRAME_INIT(frame, op_errno);
@@ -3393,11 +4053,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
- local->event_generation))
- afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
- else
- afr_lookup_do(frame, this, 0);
+ afr_lookup_do(frame, this, 0);
return 0;
out:
@@ -3407,8 +4063,18 @@ out:
}
void
-_afr_cleanup_fd_ctx(afr_fd_ctx_t *fd_ctx)
+_afr_cleanup_fd_ctx(xlator_t *this, afr_fd_ctx_t *fd_ctx)
{
+ afr_private_t *priv = this->private;
+
+ if (fd_ctx->lk_heal_info) {
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ afr_lk_heal_info_cleanup(fd_ctx->lk_heal_info);
+ fd_ctx->lk_heal_info = NULL;
+ }
GF_FREE(fd_ctx->opened_on);
GF_FREE(fd_ctx);
return;
@@ -3428,7 +4094,7 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd)
fd_ctx = (afr_fd_ctx_t *)(long)ctx;
if (fd_ctx) {
- _afr_cleanup_fd_ctx(fd_ctx);
+ _afr_cleanup_fd_ctx(this, fd_ctx);
}
out:
@@ -3521,13 +4187,14 @@ __afr_fd_ctx_set(xlator_t *this, fd_t *fd)
}
fd_ctx->readdir_subvol = -1;
+ fd_ctx->lk_heal_info = NULL;
ret = __fd_ctx_set(fd, this, (uint64_t)(long)fd_ctx);
if (ret)
gf_msg_debug(this->name, 0, "failed to set fd ctx (%p)", fd);
out:
if (ret && fd_ctx)
- _afr_cleanup_fd_ctx(fd_ctx);
+ _afr_cleanup_fd_ctx(this, fd_ctx);
return ret;
}
@@ -3551,11 +4218,10 @@ afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
} else {
local->op_errno = op_errno;
}
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0)
AFR_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno,
local->xdata_rsp);
@@ -3651,6 +4317,7 @@ afr_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
call_stub_t *stub = NULL;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -3691,11 +4358,10 @@ afr_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
} else {
local->op_errno = op_errno;
}
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0)
AFR_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
local->xdata_rsp);
@@ -4188,9 +4854,9 @@ out:
}
static int32_t
-afr_handle_inodelk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume,
- loc_t *loc, fd_t *fd, int32_t cmd, struct gf_flock *flock,
- dict_t *xdata)
+afr_handle_inodelk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
afr_local_t *local = NULL;
int32_t op_errno = ENOMEM;
@@ -4202,8 +4868,10 @@ afr_handle_inodelk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume,
local->op = fop;
if (loc)
loc_copy(&local->loc, loc);
- if (fd)
+ if (fd && (flock->l_type != F_UNLCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local->fd = fd_ref(fd);
+ }
local->cont.inodelk.volume = gf_strdup(volume);
if (!local->cont.inodelk.volume) {
@@ -4232,8 +4900,8 @@ int32_t
afr_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- afr_handle_inodelk(frame, GF_FOP_INODELK, volume, loc, NULL, cmd, flock,
- xdata);
+ afr_handle_inodelk(frame, this, GF_FOP_INODELK, volume, loc, NULL, cmd,
+ flock, xdata);
return 0;
}
@@ -4241,15 +4909,16 @@ int32_t
afr_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- afr_handle_inodelk(frame, GF_FOP_FINODELK, volume, NULL, fd, cmd, flock,
- xdata);
+ afr_handle_inodelk(frame, this, GF_FOP_FINODELK, volume, NULL, fd, cmd,
+ flock, xdata);
return 0;
}
static int
-afr_handle_entrylk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume,
- loc_t *loc, fd_t *fd, const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata)
+afr_handle_entrylk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
int32_t op_errno = ENOMEM;
@@ -4261,8 +4930,10 @@ afr_handle_entrylk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume,
local->op = fop;
if (loc)
loc_copy(&local->loc, loc);
- if (fd)
+ if (fd && (cmd != ENTRYLK_UNLOCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local->fd = fd_ref(fd);
+ }
local->cont.entrylk.cmd = cmd;
local->cont.entrylk.in_cmd = cmd;
local->cont.entrylk.type = type;
@@ -4289,8 +4960,8 @@ afr_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
const char *basename, entrylk_cmd cmd, entrylk_type type,
dict_t *xdata)
{
- afr_handle_entrylk(frame, GF_FOP_ENTRYLK, volume, loc, NULL, basename, cmd,
- type, xdata);
+ afr_handle_entrylk(frame, this, GF_FOP_ENTRYLK, volume, loc, NULL, basename,
+ cmd, type, xdata);
return 0;
}
@@ -4299,8 +4970,8 @@ afr_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
const char *basename, entrylk_cmd cmd, entrylk_type type,
dict_t *xdata)
{
- afr_handle_entrylk(frame, GF_FOP_FENTRYLK, volume, NULL, fd, basename, cmd,
- type, xdata);
+ afr_handle_entrylk(frame, this, GF_FOP_FENTRYLK, volume, NULL, fd, basename,
+ cmd, type, xdata);
return 0;
}
@@ -4312,10 +4983,10 @@ afr_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int call_count = 0;
struct statvfs *buf = NULL;
+ local = frame->local;
+
LOCK(&frame->lock);
{
- local = frame->local;
-
if (op_ret != 0) {
local->op_errno = op_errno;
goto unlock;
@@ -4341,10 +5012,9 @@ afr_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
}
}
unlock:
+ call_count = --local->call_count;
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0)
AFR_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
&local->cont.statfs.buf, local->xdata_rsp);
@@ -4419,9 +5089,10 @@ afr_lk_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
call_count = afr_frame_return(frame);
- if (call_count == 0)
+ if (call_count == 0) {
AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL,
local->xdata_rsp);
+ }
return 0;
}
@@ -4520,11 +5191,133 @@ afr_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
}
int
+afr_lk_transaction_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ return 0;
+}
+
+int
+afr_lk_txn_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = -1;
+
+ local = frame->local;
+ child_index = (long)cookie;
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_lk_txn_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int child_index = (long)cookie;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+ return 0;
+}
+int
+afr_lk_transaction(void *opaque)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *wind_on = NULL;
+ int op_errno = 0;
+ int i = 0;
+ int ret = 0;
+
+ frame = (call_frame_t *)opaque;
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+ wind_on = alloca0(priv->child_count);
+
+ if (priv->arbiter_count || priv->child_count != 3) {
+ op_errno = ENOTSUP;
+ gf_msg(frame->this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Lock healing supported only for replica 3 volumes.",
+ uuid_utoa(local->fd->inode->gfid));
+ goto err;
+ }
+
+ op_errno = -afr_dom_lock_acquire(frame); // Released during
+ // AFR_STACK_UNWIND
+ if (op_errno != 0) {
+ goto err;
+ }
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.dom_locked_nodes, this, NULL)) {
+ op_errno = afr_final_errno(local, priv);
+ goto err;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] && local->cont.lk.dom_locked_nodes[i])
+ wind_on[i] = 1;
+ }
+ AFR_ONLIST(wind_on, frame, afr_lk_txn_wind_cbk, lk, local->fd,
+ local->cont.lk.cmd, &local->cont.lk.user_flock,
+ local->xdata_req);
+
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ goto unlock;
+ } else {
+ if (local->cont.lk.user_flock.l_type == F_UNLCK)
+ ret = afr_remove_lock_from_saved_locks(local, this);
+ else
+ ret = afr_add_lock_to_saved_locks(frame, this);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, local->xdata_rsp);
+ }
+
+ return 0;
+
+unlock:
+ local->cont.lk.user_flock.l_type = F_UNLCK;
+ AFR_ONLIST(local->cont.lk.locked_nodes, frame, afr_lk_txn_unlock_cbk, lk,
+ local->fd, F_SETLK, &local->cont.lk.user_flock, NULL);
+err:
+ AFR_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+ return -1;
+}
+
+int
afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
+ int ret = 0;
int i = 0;
int32_t op_errno = ENOMEM;
@@ -4535,9 +5328,11 @@ afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
goto out;
local->op = GF_FOP_LK;
- if (!afr_lk_is_unlock(cmd, flock) &&
- !afr_is_consistent_io_possible(local, priv, &op_errno))
- goto out;
+ if (!afr_lk_is_unlock(cmd, flock)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+ }
local->cont.lk.locked_nodes = GF_CALLOC(
priv->child_count, sizeof(*local->cont.lk.locked_nodes),
@@ -4555,6 +5350,16 @@ afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
if (xdata)
local->xdata_req = dict_ref(xdata);
+ if (afr_is_lock_mode_mandatory(xdata)) {
+ ret = synctask_new(this->ctx->env, afr_lk_transaction,
+ afr_lk_transaction_cbk, frame, frame);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ return 0;
+ }
+
STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)0, priv->children[i],
priv->children[i]->fops->lk, fd, cmd, flock,
local->xdata_req);
@@ -4876,6 +5681,8 @@ afr_priv_dump(xlator_t *this)
GF_ATOMIC_GET(priv->pending_reads[i]));
sprintf(key, "child_latency[%d]", i);
gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]);
+ sprintf(key, "halo_child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->halo_child_up[i]);
}
gf_proc_dump_write("data_self_heal", "%d", priv->data_self_heal);
gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
@@ -4888,6 +5695,7 @@ afr_priv_dump(xlator_t *this)
priv->background_self_heal_count);
gf_proc_dump_write("healers", "%d", priv->healers);
gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
+ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
if (priv->quorum_count == AFR_QUORUM_AUTO) {
gf_proc_dump_write("quorum-type", "auto");
} else if (priv->quorum_count == 0) {
@@ -4948,13 +5756,31 @@ __afr_get_up_children_count(afr_private_t *priv)
return up_children;
}
+static int
+__get_heard_from_all_status(xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ int i;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ return 0;
+ }
+ }
+ if (priv->thin_arbiter_count && !priv->ta_child_up) {
+ return 0;
+ }
+ return 1;
+}
+
glusterfs_event_t
-__afr_transform_event_from_state(afr_private_t *priv)
+__afr_transform_event_from_state(xlator_t *this)
{
int i = 0;
int up_children = 0;
+ afr_private_t *priv = this->private;
- if (AFR_COUNT(priv->last_event, priv->child_count) == priv->child_count)
+ if (__get_heard_from_all_status(this))
/* have_heard_from_all. Let afr_notify() do the propagation. */
return GF_EVENT_MAXVAL;
@@ -4996,7 +5822,7 @@ afr_notify_cbk(void *data)
goto unlock;
}
priv->timer = NULL;
- event = __afr_transform_event_from_state(priv);
+ event = __afr_transform_event_from_state(this);
if (event != GF_EVENT_MAXVAL)
propagate = _gf_true;
}
@@ -5023,22 +5849,6 @@ __afr_launch_notify_timer(xlator_t *this, afr_private_t *priv)
}
}
-int
-__get_heard_from_all_status(xlator_t *this)
-{
- afr_private_t *priv = this->private;
- int heard_from_all = 1;
- int i = 0;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->last_event[i]) {
- heard_from_all = 0;
- break;
- }
- }
- return heard_from_all;
-}
-
static int
find_best_down_child(xlator_t *this)
{
@@ -5050,7 +5860,7 @@ find_best_down_child(xlator_t *this)
priv = this->private;
for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ if (!priv->child_up[i] && priv->child_latency[i] >= 0 &&
priv->child_latency[i] < best_latency) {
best_child = i;
best_latency = priv->child_latency[i];
@@ -5122,7 +5932,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
"), "
"marking child down.",
child_latency_msec, halo_max_latency_msec);
- *event = GF_EVENT_CHILD_DOWN;
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_DOWN;
+ }
}
} else if (child_latency_msec < halo_max_latency_msec &&
priv->child_up[idx] == 0) {
@@ -5134,7 +5946,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
"), "
"marking child up.",
child_latency_msec, halo_max_latency_msec);
- *event = GF_EVENT_CHILD_UP;
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_UP;
+ }
} else {
gf_log(child_xlator->name, GF_LOG_INFO,
"Not marking child %d up, "
@@ -5196,9 +6010,15 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
* want to set the child_latency to MAX to indicate
* the child needs ping data to be available before doing child-up
*/
- if (child_latency_msec < 0 && priv->halo_enabled) {
+ if (!priv->halo_enabled)
+ goto out;
+
+ if (child_latency_msec < 0) {
/*set to INT64_MAX-1 so that it is found for best_down_child*/
- priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ priv->halo_child_up[idx] = 1;
+ if (priv->child_latency[idx] < 0) {
+ priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ }
}
/*
@@ -5236,7 +6056,7 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
"up_children (%d) > halo_max_replicas (%d)",
worst_up_child, up_children, priv->halo_max_replicas);
}
-
+out:
if (up_children == 1) {
gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOL_UP,
"Subvolume '%s' came back up; "
@@ -5287,6 +6107,7 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
*/
if (child_latency_msec < 0) {
priv->child_latency[idx] = child_latency_msec;
+ priv->halo_child_up[idx] = 0;
}
priv->child_up[idx] = 0;
@@ -5299,7 +6120,7 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
* as we want it to be up to date if we are going to
* begin using it synchronously.
*/
- if (up_children < priv->halo_min_replicas) {
+ if (priv->halo_enabled && up_children < priv->halo_min_replicas) {
best_down_child = find_best_down_child(this);
if (best_down_child >= 0) {
gf_msg_debug(this->name, 0,
@@ -5311,7 +6132,6 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
*up_child = best_down_child;
}
}
-
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 0)
down_children++;
@@ -5483,13 +6303,13 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
had_quorum = priv->quorum_count &&
afr_has_quorum(priv->child_up, this, NULL);
- if (priv->halo_enabled) {
- halo_max_latency_msec = afr_get_halo_latency(this);
+ if (event == GF_EVENT_CHILD_PING) {
+ child_latency_msec = (int64_t)(uintptr_t)data2;
+ if (priv->halo_enabled) {
+ halo_max_latency_msec = afr_get_halo_latency(this);
- if (event == GF_EVENT_CHILD_PING) {
/* Calculates the child latency and sets event
*/
- child_latency_msec = (int64_t)(uintptr_t)data2;
LOCK(&priv->lock);
{
__afr_handle_ping_event(this, child_xlator, idx,
@@ -5497,6 +6317,12 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
child_latency_msec);
}
UNLOCK(&priv->lock);
+ } else {
+ LOCK(&priv->lock);
+ {
+ priv->child_latency[idx] = child_latency_msec;
+ }
+ UNLOCK(&priv->lock);
}
}
@@ -5546,6 +6372,7 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
__afr_handle_child_up_event(this, child_xlator, idx,
child_latency_msec, &event,
&call_psh, &up_child);
+ __afr_lock_heal_synctask(this, priv, idx);
break;
case GF_EVENT_CHILD_DOWN:
@@ -5559,6 +6386,7 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
__afr_handle_child_down_event(this, child_xlator, idx,
child_latency_msec, &event,
&call_psh, &up_child);
+ __afr_mark_pending_lk_heal(this, priv, idx);
break;
case GF_EVENT_CHILD_CONNECTING:
@@ -5652,7 +6480,7 @@ afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
goto out;
}
- local->child_up = GF_CALLOC(priv->child_count, sizeof(*local->child_up),
+ local->child_up = GF_MALLOC(priv->child_count * sizeof(*local->child_up),
gf_afr_mt_char);
if (!local->child_up) {
if (op_errno)
@@ -5844,6 +6672,8 @@ afr_priv_destroy(afr_private_t *priv)
if (!priv)
goto out;
+
+ GF_FREE(priv->sh_domain);
GF_FREE(priv->last_event);
child_count = priv->child_count;
@@ -5859,7 +6689,9 @@ afr_priv_destroy(afr_private_t *priv)
GF_FREE(priv->local);
GF_FREE(priv->pending_key);
GF_FREE(priv->children);
+ GF_FREE(priv->anon_inode);
GF_FREE(priv->child_up);
+ GF_FREE(priv->halo_child_up);
GF_FREE(priv->child_latency);
LOCK_DESTROY(&priv->lock);
@@ -5910,274 +6742,218 @@ out:
return changelog;
}
-gf_boolean_t
-afr_decide_heal_info(afr_private_t *priv, unsigned char *sources, int source)
+static dict_t *
+afr_set_heal_info(char *status)
{
- int sources_count = 0;
+ dict_t *dict = NULL;
+ int ret = -1;
- if (source < 0)
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
goto out;
+ }
- sources_count = AFR_COUNT(sources, priv->child_count);
- if (sources_count == priv->child_count)
- return _gf_false;
+ ret = dict_set_dynstr_sizen(dict, "heal-info", status);
+ if (ret)
+ gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set heal-info key to "
+ "%s",
+ status);
out:
- return _gf_true;
+ /* Any error other than EINVAL, dict_set_dynstr frees status */
+ if (ret == -ENOMEM || ret == -EINVAL) {
+ GF_FREE(status);
+ }
+
+ if (ret && dict) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+ return dict;
}
-int
-afr_selfheal_locked_metadata_inspect(call_frame_t *frame, xlator_t *this,
- inode_t *inode, gf_boolean_t *msh,
- unsigned char *pending)
+static gf_boolean_t
+afr_is_dirty_count_non_unary_for_txn(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type)
{
- int ret = -1;
- unsigned char *locked_on = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- unsigned char *undid_pending = NULL;
- struct afr_reply *locked_replies = NULL;
-
afr_private_t *priv = this->private;
+ int *dirty = alloca0(priv->child_count * sizeof(int));
+ int i = 0;
- locked_on = alloca0(priv->child_count);
- sources = alloca0(priv->child_count);
- sinks = alloca0(priv->child_count);
- healed_sinks = alloca0(priv->child_count);
- undid_pending = alloca0(priv->child_count);
-
- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
-
- ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
- locked_on);
- {
- if (ret == 0) {
- /* Not a single lock */
- ret = -afr_final_errno(frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN; /* all invalid responses */
- goto out;
- }
- ret = __afr_selfheal_metadata_prepare(
- frame, this, inode, locked_on, sources, sinks, healed_sinks,
- undid_pending, locked_replies, pending);
- *msh = afr_decide_heal_info(priv, sources, ret);
+ afr_selfheal_extract_xattr(this, replies, type, dirty, NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (dirty[i] > 1)
+ return _gf_true;
}
- afr_selfheal_uninodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
- locked_on);
-out:
- if (locked_replies)
- afr_replies_wipe(locked_replies, priv->child_count);
- return ret;
+
+ return _gf_false;
}
-int
-afr_selfheal_locked_data_inspect(call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_boolean_t *dsh, unsigned char *pflag)
+static gf_boolean_t
+afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type)
{
- int ret = -1;
- unsigned char *data_lock = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- unsigned char *undid_pending = NULL;
- afr_private_t *priv = NULL;
- struct afr_reply *locked_replies = NULL;
- inode_t *inode = fd->inode;
+ gf_boolean_t data_chk = _gf_false;
+ gf_boolean_t mdata_chk = _gf_false;
+ gf_boolean_t entry_chk = _gf_false;
- priv = this->private;
- data_lock = alloca0(priv->child_count);
- sources = alloca0(priv->child_count);
- sinks = alloca0(priv->child_count);
- healed_sinks = alloca0(priv->child_count);
- undid_pending = alloca0(priv->child_count);
-
- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+ switch (ia_type) {
+ case IA_IFDIR:
+ mdata_chk = _gf_true;
+ entry_chk = _gf_true;
+ break;
+ case IA_IFREG:
+ mdata_chk = _gf_true;
+ data_chk = _gf_true;
+ break;
+ default:
+ /*IA_IFBLK, IA_IFCHR, IA_IFLNK, IA_IFIFO, IA_IFSOCK*/
+ mdata_chk = _gf_true;
+ break;
+ }
- ret = afr_selfheal_inodelk(frame, this, inode, this->name, 0, 0, data_lock);
- {
- if (ret == 0) {
- ret = -afr_final_errno(frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN; /* all invalid responses */
- goto out;
- }
- ret = __afr_selfheal_data_prepare(frame, this, inode, data_lock,
- sources, sinks, healed_sinks,
- undid_pending, locked_replies, pflag);
- *dsh = afr_decide_heal_info(priv, sources, ret);
+ if (data_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_DATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (mdata_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_METADATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (entry_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_ENTRY_TRANSACTION)) {
+ return _gf_true;
}
- afr_selfheal_uninodelk(frame, this, inode, this->name, 0, 0, data_lock);
-out:
- if (locked_replies)
- afr_replies_wipe(locked_replies, priv->child_count);
- return ret;
+
+ return _gf_false;
}
-int
-afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this,
- inode_t *inode, gf_boolean_t *esh,
- unsigned char *pflag)
+static int
+afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type, gf_boolean_t *esh, gf_boolean_t *dsh,
+ gf_boolean_t *msh, unsigned char pending)
{
int ret = -1;
- int source = -1;
+ GF_UNUSED int ret1 = 0;
+ int i = 0;
+ int io_domain_lk_count = 0;
+ int shd_domain_lk_count = 0;
afr_private_t *priv = NULL;
- unsigned char *locked_on = NULL;
- unsigned char *data_lock = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- struct afr_reply *locked_replies = NULL;
- gf_boolean_t granular_locks = _gf_false;
+ char *key1 = NULL;
+ char *key2 = NULL;
priv = this->private;
- granular_locks = priv->granular_locks; /*Assign to local variable so that
- reconfigure doesn't change this
- value between locking and unlocking
- below*/
- locked_on = alloca0(priv->child_count);
- data_lock = alloca0(priv->child_count);
- sources = alloca0(priv->child_count);
- sinks = alloca0(priv->child_count);
- healed_sinks = alloca0(priv->child_count);
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
-
- if (!granular_locks) {
- ret = afr_selfheal_tryentrylk(frame, this, inode, priv->sh_domain, NULL,
- locked_on);
- }
- {
- if (!granular_locks && ret == 0) {
- ret = -afr_final_errno(frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN; /* all invalid responses */
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if ((replies[i].valid != 1) || (replies[i].op_ret != 0))
+ continue;
+ if (!io_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key1, &io_domain_lk_count);
}
+ if (!shd_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key2, &shd_domain_lk_count);
+ }
+ }
- ret = afr_selfheal_entrylk(frame, this, inode, this->name, NULL,
- data_lock);
- {
- if (ret == 0) {
- ret = -afr_final_errno(frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;
- /* all invalid responses */
- goto unlock;
- }
- ret = __afr_selfheal_entry_prepare(frame, this, inode, data_lock,
- sources, sinks, healed_sinks,
- locked_replies, &source, pflag);
- if ((ret == 0) && (*pflag & PFLAG_SBRAIN))
- ret = -EIO;
- *esh = afr_decide_heal_info(priv, sources, ret);
+ if (!pending) {
+ if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) ||
+ (!io_domain_lk_count)) {
+ /* Needs heal. */
+ ret = 0;
+ } else {
+ /* No heal needed. */
+ *dsh = *esh = *msh = 0;
+ }
+ } else {
+ if (shd_domain_lk_count) {
+ ret = -EAGAIN; /*For 'possibly-healing'. */
+ } else {
+ ret = 0; /*needs heal. Just set a non -ve value so that it is
+ assumed as the source index.*/
}
- afr_selfheal_unentrylk(frame, this, inode, this->name, NULL, data_lock,
- NULL);
}
-unlock:
- if (!granular_locks)
- afr_selfheal_unentrylk(frame, this, inode, priv->sh_domain, NULL,
- locked_on, NULL);
-out:
- if (locked_replies)
- afr_replies_wipe(locked_replies, priv->child_count);
return ret;
}
+/*return EIO, EAGAIN or pending*/
int
-afr_selfheal_locked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
- inode_t **inode, gf_boolean_t *entry_selfheal,
- gf_boolean_t *data_selfheal,
- gf_boolean_t *metadata_selfheal,
- unsigned char *pending)
-
+afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **inode, gf_boolean_t *entry_selfheal,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal, unsigned char *pending)
{
int ret = -1;
- fd_t *fd = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
gf_boolean_t dsh = _gf_false;
gf_boolean_t msh = _gf_false;
gf_boolean_t esh = _gf_false;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *valid_on = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+ sources = alloca0(sizeof(*sources) * priv->child_count);
+ sinks = alloca0(sizeof(*sinks) * priv->child_count);
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ valid_on = alloca0(sizeof(*valid_on) * priv->child_count);
ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh,
- &esh);
+ &esh, replies);
if (ret)
goto out;
-
- /* For every heal type hold locks and check if it indeed needs heal */
-
- /* Heal-info does an open() on the file being examined so that the
- * current eager-lock holding client, if present, at some point sees
- * open-fd count being > 1 and releases the eager-lock so that heal-info
- * doesn't remain blocked forever until IO completes.
- */
- if ((*inode)->ia_type == IA_IFREG) {
- ret = afr_selfheal_data_open(this, *inode, &fd);
- if (ret < 0) {
- gf_msg_debug(this->name, -ret, "%s: Failed to open",
- uuid_utoa((*inode)->gfid));
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ valid_on[i] = 1;
}
}
-
if (msh) {
- ret = afr_selfheal_locked_metadata_inspect(frame, this, *inode, &msh,
- pending);
- if (ret == -EIO)
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_METADATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
goto out;
}
-
if (dsh) {
- ret = afr_selfheal_locked_data_inspect(frame, this, fd, &dsh, pending);
- if (ret == -EIO || (ret == -EAGAIN))
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_DATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
goto out;
}
-
if (esh) {
- ret = afr_selfheal_locked_entry_inspect(frame, this, *inode, &esh,
- pending);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
}
+ ret = afr_update_heal_status(this, replies, (*inode)->ia_type, &esh, &dsh,
+ &msh, *pending);
out:
*data_selfheal = dsh;
*entry_selfheal = esh;
*metadata_selfheal = msh;
- if (fd)
- fd_unref(fd);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
return ret;
}
-static dict_t *
-afr_set_heal_info(char *status)
-{
- dict_t *dict = NULL;
- int ret = -1;
-
- dict = dict_new();
- if (!dict) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = dict_set_dynstr_sizen(dict, "heal-info", status);
- if (ret)
- gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
- "Failed to set heal-info key to "
- "%s",
- status);
-out:
- /* Any error other than EINVAL, dict_set_dynstr frees status */
- if (ret == -ENOMEM || ret == -EINVAL) {
- GF_FREE(status);
- }
-
- if (ret && dict) {
- dict_unref(dict);
- dict = NULL;
- }
- return dict;
-}
-
int
afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
@@ -6191,10 +6967,21 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
inode_t *inode = NULL;
char *substr = NULL;
char *status = NULL;
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+
+ /*Use frame with lk-owner set*/
+ heal_frame = afr_frame_create(frame->this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
- ret = afr_selfheal_locked_inspect(frame, this, loc->gfid, &inode,
- &entry_selfheal, &data_selfheal,
- &metadata_selfheal, &pending);
+ ret = afr_lockless_inspect(heal_frame, this, loc->gfid, &inode,
+ &entry_selfheal, &data_selfheal,
+ &metadata_selfheal, &pending);
if (ret == -ENOMEM) {
ret = -1;
@@ -6279,6 +7066,10 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
op_errno = 0;
out:
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
if (dict)
dict_unref(dict);
@@ -6475,6 +7266,8 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
int op_errno = 0;
dict_t *dict = NULL;
afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
local = frame->local;
dict = dict_new();
@@ -6484,7 +7277,16 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
goto out;
}
- ret = afr_selfheal_do(frame, this, loc->gfid);
+ heal_frame = afr_frame_create(this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
+ /*Initiate heal with heal_frame with lk-owner set so that inodelk/entrylk
+ * work correctly*/
+ ret = afr_selfheal_do(heal_frame, this, loc->gfid);
if (ret == 1 || ret == 2) {
ret = dict_set_sizen_str_sizen(dict, "sh-fail-msg",
@@ -6506,6 +7308,10 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
}
out:
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
if (local->op == GF_FOP_GETXATTR)
AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
else if (local->op == GF_FOP_SETXATTR)
@@ -6648,7 +7454,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)
ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS)
+ if (ret < priv->child_count)
goto data_unlock;
ret = __afr_selfheal_data_prepare(
heal_frame, this, inode, locked_on, sources, sinks,
@@ -6665,7 +7471,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)
ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name,
LLONG_MAX - 1, 0, locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS)
+ if (ret < priv->child_count)
goto mdata_unlock;
ret = __afr_selfheal_metadata_prepare(
heal_frame, this, inode, locked_on, sources, sinks,
@@ -6997,16 +7803,16 @@ afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local)
return _gf_false;
}
-gf_boolean_t
+static gf_boolean_t
afr_is_add_replica_mount_lookup_on_root(call_frame_t *frame)
{
afr_local_t *local = NULL;
- local = frame->local;
-
if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT)
return _gf_false;
+ local = frame->local;
+
if (local->op != GF_FOP_LOOKUP)
/* TODO:If the replica count is being increased on a plain distribute
* volume that was never mounted, we need to allow setxattr on '/' with
@@ -7023,14 +7829,49 @@ afr_is_add_replica_mount_lookup_on_root(call_frame_t *frame)
}
gf_boolean_t
-afr_lookup_has_quorum(call_frame_t *frame, xlator_t *this,
- unsigned char *subvols)
+afr_lookup_has_quorum(call_frame_t *frame, const unsigned int up_children_count)
{
+ if (frame && (up_children_count > 0) &&
+ afr_is_add_replica_mount_lookup_on_root(frame))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
afr_private_t *priv = this->private;
+ unsigned char *success_replies = NULL;
- if (frame && afr_is_add_replica_mount_lookup_on_root(frame)) {
- if (AFR_COUNT(subvols, priv->child_count) > 0)
- return _gf_true;
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+
+ if (priv->quorum_count && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_errno = afr_final_errno(local, priv);
+ if (!local->op_errno)
+ local->op_errno = afr_quorum_errno(priv);
+ local->op_ret = -1;
+ }
+}
+
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv, int child)
+{
+ int *pending = NULL;
+ int ret = 0;
+ int i = 0;
+
+ ret = dict_get_ptr(dict, priv->pending_key[child], (void *)&pending);
+ if (ret == 0) {
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ /* Not doing a ntoh32(pending) as we just want to check
+ * if it is non-zero or not. */
+ if (pending[i]) {
+ return _gf_true;
+ }
+ }
}
return _gf_false;
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 6307b637f8d..f8bf8340dab 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -10,7 +10,6 @@
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
@@ -18,16 +17,10 @@
#include <glusterfs/glusterfs.h>
#include <glusterfs/dict.h>
-#include <glusterfs/xlator.h>
-#include <glusterfs/hashfn.h>
-#include <glusterfs/logging.h>
#include <glusterfs/list.h>
-#include <glusterfs/call-stub.h>
-#include <glusterfs/defaults.h>
#include <glusterfs/common-utils.h>
#include <glusterfs/compat-errno.h>
#include <glusterfs/compat.h>
-#include <glusterfs/checksum.h>
#include "afr.h"
#include "afr-transaction.h"
@@ -45,6 +38,10 @@ afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx = local->fd_ctx;
child_index = (long)cookie;
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
LOCK(&frame->lock);
{
if (op_ret == -1) {
@@ -56,19 +53,22 @@ afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!local->xdata_rsp && xdata)
local->xdata_rsp = dict_ref(xdata);
}
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
- if (call_count == 0)
+ if (call_count == 0) {
+ afr_handle_replies_quorum(frame, this);
AFR_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno,
local->fd, NULL);
+ }
+
return 0;
}
int
-afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -84,6 +84,12 @@ afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
goto out;
local->op = GF_FOP_OPENDIR;
+
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
+
if (!afr_is_consistent_io_possible(local, priv, &op_errno))
goto out;
@@ -158,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
}
static void
-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
- gf_dirent_t *entries, fd_t *fd)
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ int subvol, gf_dirent_t *entries, fd_t *fd)
{
int ret = -1;
gf_dirent_t *entry = NULL;
@@ -177,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
{
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
+ frame->root->pid)) {
continue;
}
@@ -222,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret >= 0)
- afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries,
- local->fd);
+ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+ &entries, local->fd);
AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 84e2a344624..b7cceb79158 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -10,7 +10,6 @@
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
@@ -18,11 +17,8 @@
#include <glusterfs/glusterfs.h>
#include "afr.h"
#include <glusterfs/dict.h>
-#include <glusterfs/xlator.h>
-#include <glusterfs/hashfn.h>
#include <glusterfs/logging.h>
#include <glusterfs/list.h>
-#include <glusterfs/call-stub.h>
#include <glusterfs/defaults.h>
#include <glusterfs/common-utils.h>
#include <glusterfs/compat-errno.h>
@@ -123,11 +119,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
continue;
if (local->replies[i].op_ret < 0) {
if (local->inode)
- afr_inode_event_gen_reset(local->inode, this);
+ afr_inode_need_refresh_set(local->inode, this);
if (local->parent)
- afr_inode_event_gen_reset(local->parent, this);
+ afr_inode_need_refresh_set(local->parent, this);
if (local->parent2)
- afr_inode_event_gen_reset(local->parent2, this);
+ afr_inode_need_refresh_set(local->parent2, this);
continue;
}
@@ -233,9 +229,9 @@ __afr_dir_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
__afr_dir_write_fill(frame, this, child_index, op_ret, op_errno, buf,
preparent, postparent, preparent2, postparent2,
xdata);
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
if (call_count == 0) {
__afr_dir_write_finalize(frame, this);
@@ -349,6 +345,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
int pre_op_count = 0;
int failed_count = 0;
+ unsigned char *success_replies = NULL;
local = frame->local;
priv = this->private;
@@ -364,9 +361,16 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
failed_count = AFR_COUNT(local->transaction.failed_subvols,
priv->child_count);
+ /* FOP succeeded on all bricks. */
if (pre_op_count == priv->child_count && !failed_count)
return;
+ /* FOP did not suceed on quorum no. of bricks. */
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+ if (!afr_has_quorum(success_replies, this, NULL))
+ return;
+
if (priv->thin_arbiter_count) {
/*Mark new entry using ta file*/
local->is_new_entry = _gf_true;
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 523a5b48880..c5521704de2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -18,11 +18,8 @@
#include <glusterfs/glusterfs.h>
#include "afr.h"
#include <glusterfs/dict.h>
-#include <glusterfs/xlator.h>
-#include <glusterfs/hashfn.h>
#include <glusterfs/logging.h>
#include <glusterfs/list.h>
-#include <glusterfs/call-stub.h>
#include <glusterfs/byte-order.h>
#include <glusterfs/defaults.h>
#include <glusterfs/common-utils.h>
@@ -305,6 +302,7 @@ afr_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
afr_local_t *local = NULL;
int op_errno = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -948,24 +946,13 @@ unlock:
goto unwind;
}
- len = dict_serialized_length(local->dict);
- if (len <= 0) {
- goto unwind;
- }
-
- lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char);
- if (!lockinfo_buf) {
+ op_ret = dict_allocate_and_serialize(
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
local->op_ret = -1;
- local->op_errno = ENOMEM;
goto unwind;
}
- op_ret = dict_serialize(local->dict, lockinfo_buf);
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = -op_ret;
- }
-
op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
(void *)lockinfo_buf, len);
if (op_ret < 0) {
@@ -1064,24 +1051,13 @@ unlock:
goto unwind;
}
- len = dict_serialized_length(local->dict);
- if (len <= 0) {
- goto unwind;
- }
-
- lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char);
- if (!lockinfo_buf) {
+ op_ret = dict_allocate_and_serialize(
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
local->op_ret = -1;
- local->op_errno = ENOMEM;
goto unwind;
}
- op_ret = dict_serialize(local->dict, lockinfo_buf);
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = -op_ret;
- }
-
op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
(void *)lockinfo_buf, len);
if (op_ret < 0) {
@@ -1723,6 +1699,7 @@ afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
int32_t op_errno = 0;
fop_fgetxattr_cbk_t cbk = NULL;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -1816,6 +1793,7 @@ afr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
afr_local_t *local = NULL;
int32_t op_errno = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -1891,6 +1869,7 @@ afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
afr_local_t *local = NULL;
int32_t op_errno = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index 1627ee2c426..8c982bc7e6f 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -38,5 +38,8 @@ afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata);
int
+afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+int
afr_handle_quota_size(call_frame_t *frame, xlator_t *this);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 7fcc9d48ada..1d6e4f3570a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -8,9 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
@@ -18,11 +16,7 @@
#include <glusterfs/glusterfs.h>
#include "afr.h"
#include <glusterfs/dict.h>
-#include <glusterfs/xlator.h>
-#include <glusterfs/hashfn.h>
#include <glusterfs/logging.h>
-#include <glusterfs/list.h>
-#include <glusterfs/call-stub.h>
#include <glusterfs/defaults.h>
#include <glusterfs/common-utils.h>
#include <glusterfs/compat-errno.h>
@@ -180,11 +174,10 @@ __afr_inode_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
__afr_inode_write_fill(frame, this, child_index, op_ret, op_errno,
prebuf, postbuf, xattr, xdata);
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0) {
__afr_inode_write_finalize(frame, this);
@@ -498,6 +491,7 @@ afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
int op_errno = ENOMEM;
int ret = -1;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -737,6 +731,7 @@ afr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -947,6 +942,7 @@ afr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -1063,11 +1059,10 @@ afr_emptyb_set_pending_changelog_cbk(call_frame_t *frame, void *cookie,
if (ret)
goto out;
- gf_msg(this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,
- op_ret ? op_errno : 0, afr_get_msg_id(op_type),
- "Set of pending xattr %s on"
- " %s.",
- op_ret ? "failed" : "succeeded", priv->children[i]->name);
+ gf_smsg(this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,
+ op_ret ? op_errno : 0, AFR_MSG_SET_PEND_XATTR, "name=%s",
+ priv->children[i]->name, "op_ret=%s",
+ op_ret ? "failed" : "succeeded", NULL);
out:
syncbarrier_wake(&local->barrier);
@@ -1161,9 +1156,8 @@ _afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc,
}
if (!count) {
- gf_msg(this->name, GF_LOG_ERROR, EAGAIN, AFR_MSG_REPLACE_BRICK_STATUS,
- "Couldn't acquire lock on"
- " any child.");
+ gf_smsg(this->name, GF_LOG_ERROR, EAGAIN, AFR_MSG_REPLACE_BRICK_STATUS,
+ NULL);
ret = -EAGAIN;
goto unlock;
}
@@ -1214,6 +1208,7 @@ _afr_handle_empty_brick(void *opaque)
char *op_type = NULL;
int op_type_len = 0;
afr_empty_brick_args_t *data = NULL;
+ call_frame_t *op_frame = NULL;
data = opaque;
frame = data->frame;
@@ -1221,21 +1216,29 @@ _afr_handle_empty_brick(void *opaque)
if (!data->op_type)
goto out;
+ op_frame = copy_frame(frame);
+ if (!op_frame) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
op_type = data->op_type;
op_type_len = strlen(op_type);
- this = frame->this;
+ this = op_frame->this;
priv = this->private;
- local = AFR_FRAME_INIT(frame, op_errno);
+ afr_set_lk_owner(op_frame, this, op_frame->root);
+ local = AFR_FRAME_INIT(op_frame, op_errno);
if (!local)
goto out;
loc_copy(&local->loc, &data->loc);
- gf_msg(this->name, GF_LOG_INFO, 0, 0, "New brick is : %s",
- priv->children[empty_index]->name);
+ gf_smsg(this->name, GF_LOG_INFO, 0, AFR_MSG_NEW_BRICK, "name=%s",
+ priv->children[empty_index]->name, NULL);
- ret = _afr_handle_empty_brick_type(this, frame, &local->loc, empty_index,
+ ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index,
AFR_METADATA_TRANSACTION, op_type,
op_type_len);
if (ret) {
@@ -1251,7 +1254,7 @@ _afr_handle_empty_brick(void *opaque)
local->xattr_req = NULL;
local->xdata_req = NULL;
- ret = _afr_handle_empty_brick_type(this, frame, &local->loc, empty_index,
+ ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index,
AFR_ENTRY_TRANSACTION, op_type,
op_type_len);
if (ret) {
@@ -1261,6 +1264,9 @@ _afr_handle_empty_brick(void *opaque)
}
ret = 0;
out:
+ if (op_frame) {
+ AFR_STACK_DESTROY(op_frame);
+ }
AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
return 0;
}
@@ -1305,9 +1311,8 @@ afr_split_brain_resolve_do(call_frame_t *frame, xlator_t *this, loc_t *loc,
*/
ret = afr_inode_split_brain_choice_set(loc->inode, this, -1);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
- "Failed to set"
- "split-brain choice to -1");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_SET_FAILED,
+ NULL);
afr_heal_splitbrain_file(frame, this, loc);
ret = 0;
out:
@@ -1330,8 +1335,8 @@ afr_get_split_brain_child_index(xlator_t *this, void *value, size_t len)
spb_child_index = afr_get_child_index_from_name(this, spb_child_str);
if (spb_child_index < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
- "Invalid subvol: %s", spb_child_str);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "subvol=%s", spb_child_str, NULL);
}
return spb_child_index;
}
@@ -1353,11 +1358,9 @@ afr_can_set_split_brain_choice(void *opaque)
&data->m_spb);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
- "Failed to determine if %s"
- " is in split-brain. "
- "Aborting split-brain-choice set.",
- uuid_utoa(loc->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, "gfid=%s",
+ uuid_utoa(loc->gfid), NULL);
return ret;
}
@@ -1365,7 +1368,8 @@ int
afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
dict_t *dict)
{
- void *value = NULL;
+ void *choice_value = NULL;
+ void *resolve_value = NULL;
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_spbc_timeout_t *data = NULL;
@@ -1376,6 +1380,14 @@ afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
priv = this->private;
+ ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_CHOICE, &choice_value, &len);
+ ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_RESOLVE, &resolve_value,
+ &len);
+ if (!choice_value && !resolve_value) {
+ ret = -1;
+ goto out;
+ }
+
local = AFR_FRAME_INIT(frame, op_errno);
if (!local) {
ret = 1;
@@ -1384,9 +1396,9 @@ afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
local->op = GF_FOP_SETXATTR;
- ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_CHOICE, &value, &len);
- if (value) {
- spb_child_index = afr_get_split_brain_child_index(this, value, len);
+ if (choice_value) {
+ spb_child_index = afr_get_split_brain_child_index(this, choice_value,
+ len);
if (spb_child_index < 0) {
/* Case where value was "none" */
if (spb_child_index == -2)
@@ -1410,12 +1422,8 @@ afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
ret = synctask_new(this->ctx->env, afr_can_set_split_brain_choice,
afr_set_split_brain_choice, NULL, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
- "Failed to create"
- " synctask. Aborting split-brain choice set"
- " for %s",
- loc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ "name=%s", loc->name, NULL);
ret = 1;
op_errno = ENOMEM;
goto out;
@@ -1424,9 +1432,9 @@ afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
goto out;
}
- ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_RESOLVE, &value, &len);
- if (value) {
- spb_child_index = afr_get_split_brain_child_index(this, value, len);
+ if (resolve_value) {
+ spb_child_index = afr_get_split_brain_child_index(this, resolve_value,
+ len);
if (spb_child_index < 0) {
ret = 1;
goto out;
@@ -1490,8 +1498,8 @@ afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc,
goto out;
if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT) {
- gf_msg(this->name, GF_LOG_ERROR, EPERM, afr_get_msg_id(op_type),
- "'%s' is an internal extended attribute.", op_type);
+ gf_smsg(this->name, GF_LOG_ERROR, EPERM, AFR_MSG_INTERNAL_ATTR,
+ "op_type=%s", op_type, NULL);
ret = 1;
goto out;
}
@@ -1517,8 +1525,8 @@ afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc,
ret = synctask_new(this->ctx->env, _afr_handle_empty_brick,
_afr_handle_empty_brick_cbk, NULL, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, afr_get_msg_id(op_type),
- "Failed to create synctask.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ NULL);
ret = 1;
op_errno = ENOMEM;
afr_brick_args_cleanup(data);
@@ -1676,6 +1684,7 @@ afr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -1884,6 +1893,7 @@ afr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -1984,6 +1994,7 @@ afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2093,6 +2104,7 @@ afr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2199,6 +2211,7 @@ afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2398,6 +2411,7 @@ afr_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2492,7 +2506,9 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
call_frame_t *transaction_frame = NULL;
int ret = -1;
int32_t op_errno = ENOMEM;
+ int8_t last_fsync = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2501,10 +2517,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
if (!local)
goto out;
- if (xdata)
+ if (xdata) {
local->xdata_req = dict_copy_with_ref(xdata, NULL);
- else
+ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) {
+ if (last_fsync) {
+ local->transaction.disable_delayed_post_op = _gf_true;
+ }
+ }
+ } else {
local->xdata_req = dict_new();
+ }
if (!local->xdata_req)
goto out;
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index b0fb00641a0..816065fb57a 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -31,6 +31,8 @@ enum gf_afr_mem_types_ {
gf_afr_mt_empty_brick_t,
gf_afr_mt_child_latency_t,
gf_afr_mt_atomic_t,
+ gf_afr_mt_lk_heal_info_t,
+ gf_afr_mt_gf_lock,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
index c9c99270e98..e73fd997765 100644
--- a/xlators/cluster/afr/src/afr-messages.h
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -23,25 +23,145 @@
* glfs-message-id.h.
*/
-GLFS_MSGID(AFR, AFR_MSG_QUORUM_FAIL, AFR_MSG_QUORUM_MET,
- AFR_MSG_QUORUM_OVERRIDE, AFR_MSG_INVALID_CHILD_UP, AFR_MSG_SUBVOL_UP,
- AFR_MSG_SUBVOLS_DOWN, AFR_MSG_ENTRY_UNLOCK_FAIL, AFR_MSG_SPLIT_BRAIN,
- AFR_MSG_OPEN_FAIL, AFR_MSG_UNLOCK_FAIL, AFR_MSG_REPLACE_BRICK_STATUS,
- AFR_MSG_GFID_NULL, AFR_MSG_FD_CREATE_FAILED, AFR_MSG_DICT_SET_FAILED,
- AFR_MSG_EXPUNGING_FILE_OR_DIR, AFR_MSG_MIGRATION_IN_PROGRESS,
- AFR_MSG_CHILD_MISCONFIGURED, AFR_MSG_VOL_MISCONFIGURED,
- AFR_MSG_INTERNAL_LKS_FAILED, AFR_MSG_INVALID_FD, AFR_MSG_LOCK_INFO,
- AFR_MSG_LOCK_XLATOR_NOT_LOADED, AFR_MSG_FD_CTX_GET_FAILED,
- AFR_MSG_INVALID_SUBVOL, AFR_MSG_PUMP_XLATOR_ERROR,
- AFR_MSG_SELF_HEAL_INFO, AFR_MSG_READ_SUBVOL_ERROR,
- AFR_MSG_DICT_GET_FAILED, AFR_MSG_INFO_COMMON,
- AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, AFR_MSG_LOCAL_CHILD,
- AFR_MSG_INVALID_DATA, AFR_MSG_INVALID_ARG,
- AFR_MSG_INDEX_DIR_GET_FAILED, AFR_MSG_FSYNC_FAILED,
- AFR_MSG_FAVORITE_CHILD, AFR_MSG_SELF_HEAL_FAILED,
- AFR_MSG_SPLIT_BRAIN_STATUS, AFR_MSG_ADD_BRICK_STATUS,
- AFR_MSG_NO_CHANGELOG, AFR_MSG_TIMER_CREATE_FAIL,
- AFR_MSG_SBRAIN_FAV_CHILD_POLICY, AFR_MSG_INODE_CTX_GET_FAILED,
- AFR_MSG_THIN_ARB);
+GLFS_MSGID(
+ AFR, AFR_MSG_QUORUM_FAIL, AFR_MSG_QUORUM_MET, AFR_MSG_QUORUM_OVERRIDE,
+ AFR_MSG_INVALID_CHILD_UP, AFR_MSG_SUBVOL_UP, AFR_MSG_SUBVOLS_DOWN,
+ AFR_MSG_ENTRY_UNLOCK_FAIL, AFR_MSG_SPLIT_BRAIN, AFR_MSG_OPEN_FAIL,
+ AFR_MSG_UNLOCK_FAIL, AFR_MSG_REPLACE_BRICK_STATUS, AFR_MSG_GFID_NULL,
+ AFR_MSG_FD_CREATE_FAILED, AFR_MSG_DICT_SET_FAILED,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, AFR_MSG_MIGRATION_IN_PROGRESS,
+ AFR_MSG_CHILD_MISCONFIGURED, AFR_MSG_VOL_MISCONFIGURED,
+ AFR_MSG_INTERNAL_LKS_FAILED, AFR_MSG_INVALID_FD, AFR_MSG_LOCK_INFO,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED, AFR_MSG_FD_CTX_GET_FAILED,
+ AFR_MSG_INVALID_SUBVOL, AFR_MSG_PUMP_XLATOR_ERROR, AFR_MSG_SELF_HEAL_INFO,
+ AFR_MSG_READ_SUBVOL_ERROR, AFR_MSG_DICT_GET_FAILED, AFR_MSG_INFO_COMMON,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, AFR_MSG_LOCAL_CHILD, AFR_MSG_INVALID_DATA,
+ AFR_MSG_INVALID_ARG, AFR_MSG_INDEX_DIR_GET_FAILED, AFR_MSG_FSYNC_FAILED,
+ AFR_MSG_FAVORITE_CHILD, AFR_MSG_SELF_HEAL_FAILED,
+ AFR_MSG_SPLIT_BRAIN_STATUS, AFR_MSG_ADD_BRICK_STATUS, AFR_MSG_NO_CHANGELOG,
+ AFR_MSG_TIMER_CREATE_FAIL, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ AFR_MSG_INODE_CTX_GET_FAILED, AFR_MSG_THIN_ARB,
+ AFR_MSG_THIN_ARB_XATTROP_FAILED, AFR_MSG_THIN_ARB_LOC_POP_FAILED,
+ AFR_MSG_GET_PEND_VAL, AFR_MSG_THIN_ARB_SKIP_SHD, AFR_MSG_UNKNOWN_SET,
+ AFR_MSG_NO_XL_ID, AFR_MSG_SELF_HEAL_INFO_START,
+ AFR_MSG_SELF_HEAL_INFO_FINISH, AFR_MSG_INCRE_COUNT,
+ AFR_MSG_ADD_TO_OUTPUT_FAILED, AFR_MSG_SET_TIME_FAILED,
+ AFR_MSG_GFID_MISMATCH_DETECTED, AFR_MSG_GFID_HEAL_MSG,
+ AFR_MSG_THIN_ARB_LOOKUP_FAILED, AFR_MSG_DICT_CREATE_FAILED,
+ AFR_MSG_NO_MAJORITY_TO_RESOLVE, AFR_MSG_TYPE_MISMATCH,
+ AFR_MSG_SIZE_POLICY_NOT_APPLICABLE, AFR_MSG_NO_CHILD_SELECTED,
+ AFR_MSG_INVALID_CHILD, AFR_MSG_RESOLVE_CONFLICTING_DATA,
+ SERROR_GETTING_SRC_BRICK, SNO_DIFF_IN_MTIME, SNO_BIGGER_FILE,
+ SALL_BRICKS_UP_TO_RESOLVE, AFR_MSG_UNLOCK_FAILED, AFR_MSG_POST_OP_FAILED,
+ AFR_MSG_TA_FRAME_CREATE_FAILED, AFR_MSG_SET_KEY_XATTROP_FAILED,
+ AFR_MSG_BLOCKING_ENTRYLKS_FAILED, AFR_MSG_FOP_FAILED,
+ AFR_MSG_CLEAN_UP_FAILED, AFR_MSG_UNABLE_TO_FETCH, AFR_MSG_XATTR_SET_FAILED,
+ AFR_MSG_SPLIT_BRAIN_REPLICA, AFR_MSG_INODE_CTX_FAILED,
+ AFR_MSG_LOOKUP_FAILED, AFR_MSG_ALL_SUBVOLS_DOWN,
+ AFR_MSG_RELEASE_LOCK_FAILED, AFR_MSG_CLEAR_TIME_SPLIT_BRAIN,
+ AFR_MSG_READ_FAILED, AFR_MSG_LAUNCH_FAILED, AFR_MSG_READ_SUBVOL_NOT_UP,
+ AFR_MSG_LK_HEAL_DOM, AFR_MSG_NEW_BRICK, AFR_MSG_SPLIT_BRAIN_SET_FAILED,
+ AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, AFR_MSG_HEALER_SPAWN_FAILED,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, AFR_MSG_NULL_DEREF, AFR_MSG_SET_PEND_XATTR,
+ AFR_MSG_INTERNAL_ATTR);
+#define AFR_MSG_DICT_GET_FAILED_STR "Dict get failed"
+#define AFR_MSG_DICT_SET_FAILED_STR "Dict set failed"
+#define AFR_MSG_HEALER_SPAWN_FAILED_STR "Healer spawn failed"
+#define AFR_MSG_ADD_CRAWL_EVENT_FAILED_STR "Adding crawl event failed"
+#define AFR_MSG_INVALID_ARG_STR "Invalid argument"
+#define AFR_MSG_INDEX_DIR_GET_FAILED_STR "unable to get index-dir on "
+#define AFR_MSG_THIN_ARB_LOOKUP_FAILED_STR "Failed lookup on file"
+#define AFR_MSG_DICT_CREATE_FAILED_STR "Failed to create dict."
+#define AFR_MSG_THIN_ARB_XATTROP_FAILED_STR "Xattrop failed."
+#define AFR_MSG_THIN_ARB_LOC_POP_FAILED_STR \
+ "Failed to populate loc for thin-arbiter"
+#define AFR_MSG_GET_PEND_VAL_STR "Error getting value of pending"
+#define AFR_MSG_THIN_ARB_SKIP_SHD_STR "I am not the god shd. skipping."
+#define AFR_MSG_UNKNOWN_SET_STR "Unknown set"
+#define AFR_MSG_NO_XL_ID_STR "xl does not have id"
+#define AFR_MSG_SELF_HEAL_INFO_START_STR "starting full sweep on"
+#define AFR_MSG_SELF_HEAL_INFO_FINISH_STR "finished full sweep on"
+#define AFR_MSG_INCRE_COUNT_STR "Could not increment the counter."
+#define AFR_MSG_ADD_TO_OUTPUT_FAILED_STR "Could not add to output"
+#define AFR_MSG_SET_TIME_FAILED_STR "Could not set time"
+#define AFR_MSG_GFID_HEAL_MSG_STR "Error setting gfid-heal-msg dict"
+#define AFR_MSG_NO_MAJORITY_TO_RESOLVE_STR \
+ "No majority to resolve gfid split brain"
+#define AFR_MSG_GFID_MISMATCH_DETECTED_STR "Gfid mismatch dectected"
+#define AFR_MSG_SELF_HEAL_INFO_STR "performing selfheal"
+#define AFR_MSG_TYPE_MISMATCH_STR "TYPE mismatch"
+#define AFR_MSG_SIZE_POLICY_NOT_APPLICABLE_STR \
+ "Size policy is not applicable to directories."
+#define AFR_MSG_NO_CHILD_SELECTED_STR \
+ "No child selected by favorite-child policy"
+#define AFR_MSG_INVALID_CHILD_STR "Invalid child"
+#define AFR_MSG_RESOLVE_CONFLICTING_DATA_STR \
+ "selected as authentic to resolve conflicting data"
+#define SERROR_GETTING_SRC_BRICK_STR "Error getting the source brick"
+#define SNO_DIFF_IN_MTIME_STR "No difference in mtime"
+#define SNO_BIGGER_FILE_STR "No bigger file"
+#define SALL_BRICKS_UP_TO_RESOLVE_STR \
+ "All the bricks should be up to resolve the gfid split brain"
+#define AFR_MSG_UNLOCK_FAILED_STR "Failed to unlock"
+#define AFR_MSG_POST_OP_FAILED_STR "Post-op on thin-arbiter failed"
+#define AFR_MSG_TA_FRAME_CREATE_FAILED_STR "Failed to create ta_frame"
+#define AFR_MSG_SET_KEY_XATTROP_FAILED_STR "Could not set key during xattrop"
+#define AFR_MSG_BLOCKING_ENTRYLKS_FAILED_STR "Blocking entrylks failed"
+#define AFR_MSG_FSYNC_FAILED_STR "fsync failed"
+#define AFR_MSG_QUORUM_FAIL_STR "quorum is not met"
+#define AFR_MSG_FOP_FAILED_STR "Failing Fop"
+#define AFR_MSG_INVALID_SUBVOL_STR "not a subvolume"
+#define AFR_MSG_VOL_MISCONFIGURED_STR "Volume is dangling"
+#define AFR_MSG_CHILD_MISCONFIGURED_STR \
+ "replicate translator needs more than one subvolume defined"
+#define AFR_MSG_CLEAN_UP_FAILED_STR "Failed to clean up healer threads"
+#define AFR_MSG_QUORUM_OVERRIDE_STR "overriding quorum-count"
+#define AFR_MSG_UNABLE_TO_FETCH_STR \
+ "Unable to fetch afr-pending-xattr option from volfile. Falling back to " \
+ "using client translator names"
+#define AFR_MSG_NULL_DEREF_STR "possible NULL deref"
+#define AFR_MSG_XATTR_SET_FAILED_STR "Cannot set xattr cookie key"
+#define AFR_MSG_SPLIT_BRAIN_STATUS_STR "Failed to create synctask"
+#define AFR_MSG_SUBVOLS_DOWN_STR "All subvolumes are not up"
+#define AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR_STR \
+ "Failed to cancel split-brain choice"
+#define AFR_MSG_SPLIT_BRAIN_REPLICA_STR \
+ "Cannot set replica. File is not in data/metadata split-brain"
+#define AFR_MSG_INODE_CTX_FAILED_STR "Failed to get inode_ctx"
+#define AFR_MSG_READ_SUBVOL_ERROR_STR "no read subvols"
+#define AFR_MSG_LOCAL_CHILD_STR "selecting local read-child"
+#define AFR_MSG_LOOKUP_FAILED_STR "Failed to lookup/create thin-arbiter id file"
+#define AFR_MSG_TIMER_CREATE_FAIL_STR \
+ "Cannot create timer for delayed initialization"
+#define AFR_MSG_SUBVOL_UP_STR "Subvolume came back up; going online"
+#define AFR_MSG_ALL_SUBVOLS_DOWN_STR \
+ "All subvolumes are down. Going offline until atleast one of them is up"
+#define AFR_MSG_RELEASE_LOCK_FAILED_STR "Failed to release lock"
+#define AFR_MSG_INVALID_CHILD_UP_STR "Received child_up from invalid subvolume"
+#define AFR_MSG_QUORUM_MET_STR "Client-quorum is met"
+#define AFR_MSG_EXPUNGING_FILE_OR_DIR_STR "expunging file or dir"
+#define AFR_MSG_SELF_HEAL_FAILED_STR "Invalid"
+#define AFR_MSG_SPLIT_BRAIN_STR "Skipping conservative mergeon the file"
+#define AFR_MSG_CLEAR_TIME_SPLIT_BRAIN_STR "clear time split brain"
+#define AFR_MSG_READ_FAILED_STR "Failing read since good brick is down"
+#define AFR_MSG_LAUNCH_FAILED_STR "Failed to launch synctask"
+#define AFR_MSG_READ_SUBVOL_NOT_UP_STR \
+ "read subvolume in this generation is not up"
+#define AFR_MSG_INTERNAL_LKS_FAILED_STR \
+ "Unable to work with lk-owner while attempting fop"
+#define AFR_MSG_LOCK_XLATOR_NOT_LOADED_STR \
+ "subvolume does not support locking. please load features/locks xlator " \
+ "on server."
+#define AFR_MSG_FD_CTX_GET_FAILED_STR "unable to get fd ctx"
+#define AFR_MSG_INFO_COMMON_STR "fd not open on any subvolumes, aborting."
+#define AFR_MSG_REPLACE_BRICK_STATUS_STR "Couldn't acquire lock on any child."
+#define AFR_MSG_NEW_BRICK_STR "New brick"
+#define AFR_MSG_SPLIT_BRAIN_SET_FAILED_STR \
+ "Failed to set split-brain choice to -1"
+#define AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED_STR \
+ "Failed to determine split-brain. Aborting split-brain-choice set"
+#define AFR_MSG_OPEN_FAIL_STR "Failed to open subvolume"
+#define AFR_MSG_SET_PEND_XATTR_STR "Set of pending xattr"
+#define AFR_MSG_INTERNAL_ATTR_STR "is an internal extended attribute"
#endif /* !_AFR_MESSAGES_H_ */
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index ff72c73a9f4..64856042b65 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -8,9 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
@@ -18,11 +16,7 @@
#include <glusterfs/glusterfs.h>
#include "afr.h"
#include <glusterfs/dict.h>
-#include <glusterfs/xlator.h>
-#include <glusterfs/hashfn.h>
#include <glusterfs/logging.h>
-#include <glusterfs/list.h>
-#include <glusterfs/call-stub.h>
#include <glusterfs/defaults.h>
#include <glusterfs/common-utils.h>
#include <glusterfs/compat-errno.h>
@@ -30,10 +24,6 @@
#include <glusterfs/byte-order.h>
#include <glusterfs/statedump.h>
-#include "afr-inode-read.h"
-#include "afr-inode-write.h"
-#include "afr-dir-read.h"
-#include "afr-dir-write.h"
#include "afr-transaction.h"
gf_boolean_t
@@ -73,6 +63,10 @@ afr_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
local = frame->local;
fd_ctx = local->fd_ctx;
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
LOCK(&frame->lock);
{
if (op_ret == -1) {
@@ -84,13 +78,16 @@ afr_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
if (!local->xdata_rsp && xdata)
local->xdata_rsp = dict_ref(xdata);
}
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0) {
- if ((fd_ctx->flags & O_TRUNC) && (local->op_ret >= 0)) {
+ afr_handle_replies_quorum(frame, this);
+ if (local->op_ret == -1) {
+ AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, NULL,
+ NULL);
+ } else if (fd_ctx->flags & O_TRUNC) {
STACK_WIND(frame, afr_open_ftruncate_cbk, this,
this->fops->ftruncate, fd, 0, NULL);
} else {
@@ -140,7 +137,7 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int spb_choice = 0;
+ int spb_subvol = 0;
int event_generation = 0;
int ret = 0;
int32_t op_errno = 0;
@@ -161,6 +158,11 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto out;
}
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
+
if (!afr_is_consistent_io_possible(local, priv, &op_errno))
goto out;
@@ -177,9 +179,9 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ret = afr_inode_get_readable(frame, local->inode, this, NULL,
&event_generation, AFR_DATA_TRANSACTION);
if ((ret < 0) &&
- (afr_inode_split_brain_choice_get(local->inode, this, &spb_choice) ==
- 0) &&
- spb_choice < 0) {
+ (afr_split_brain_read_subvol_get(local->inode, this, NULL,
+ &spb_subvol) == 0) &&
+ spb_subvol < 0) {
afr_inode_refresh(frame, this, local->inode, local->inode->gfid,
afr_open_continue);
} else {
@@ -213,11 +215,9 @@ afr_openfd_fix_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
"successfully on subvolume %s",
local->loc.path, priv->children[child_index]->name);
} else {
- gf_msg(this->name, fop_log_level(GF_FOP_OPEN, op_errno), op_errno,
- AFR_MSG_OPEN_FAIL,
- "Failed to open %s on "
- "subvolume %s",
- local->loc.path, priv->children[child_index]->name);
+ gf_smsg(this->name, fop_log_level(GF_FOP_OPEN, op_errno), op_errno,
+ AFR_MSG_OPEN_FAIL, "path=%s", local->loc.path, "subvolume=%s",
+ priv->children[child_index]->name, NULL);
}
fd_ctx = local->fd_ctx;
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 7e258049005..6fc2c75145c 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -30,27 +30,6 @@ afr_pending_read_decrement(afr_private_t *priv, int child_index)
GF_ATOMIC_DEC(priv->pending_reads[child_index]);
}
-static gf_boolean_t
-afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv, int child)
-{
- int *pending = NULL;
- int ret = 0;
- int i = 0;
-
- ret = dict_get_ptr(dict, priv->pending_key[child], (void *)&pending);
- if (ret == 0) {
- for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
- /* Not doing a ntoh32(pending) as we just want to check
- * if it is non-zero or not. */
- if (pending[i]) {
- return _gf_true;
- }
- }
- }
-
- return _gf_false;
-}
-
void
afr_read_txn_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
@@ -185,7 +164,7 @@ afr_ta_read_txn(void *opaque)
xdata_rsp = NULL;
/* It doesn't. So query thin-arbiter to see if it blames any data brick. */
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc.name);
@@ -293,7 +272,7 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
int read_subvol = -1;
inode_t *inode = NULL;
int ret = -1;
- int spb_choice = -1;
+ int spb_subvol = -1;
local = frame->local;
inode = local->inode;
@@ -324,9 +303,9 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
local->read_attempted[read_subvol] = 1;
readfn:
if (read_subvol == -1) {
- ret = afr_inode_split_brain_choice_get(inode, this, &spb_choice);
- if ((ret == 0) && spb_choice >= 0)
- read_subvol = spb_choice;
+ ret = afr_split_brain_read_subvol_get(inode, this, frame, &spb_subvol);
+ if ((ret == 0) && spb_subvol >= 0)
+ read_subvol = spb_subvol;
}
if (read_subvol == -1) {
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 2a00f5f9bfa..a580a1584cc 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -55,7 +55,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
for (i = 0; i < priv->child_count; i++) {
if (source == -1) {
/* case (a) above. */
- if (replies[i].valid && replies[i].op_ret == 0) {
+ if (replies[i].valid && replies[i].op_ret == 0 &&
+ replies[i].poststat.ia_type != IA_INVAL) {
ia_type = replies[i].poststat.ia_type;
break;
}
@@ -63,7 +64,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
/* case (b) above. */
if (i == source)
continue;
- if (sources[i] && replies[i].valid && replies[i].op_ret == 0) {
+ if (sources[i] && replies[i].valid && replies[i].op_ret == 0 &&
+ replies[i].poststat.ia_type != IA_INVAL) {
ia_type = replies[i].poststat.ia_type;
break;
}
@@ -77,6 +79,12 @@ heal:
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid || replies[i].op_ret != 0)
continue;
+
+ if (gf_uuid_is_null(gfid) &&
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid) &&
+ replies[i].poststat.ia_type == ia_type)
+ gfid = replies[i].poststat.ia_gfid;
+
if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) ||
replies[i].poststat.ia_type != ia_type)
continue;
@@ -132,7 +140,7 @@ heal:
}
}
out:
- if (gfid_idx && (*gfid_idx == -1) && (ret == 0)) {
+ if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) {
ret = -afr_final_errno(local, priv);
}
loc_wipe(&loc);
@@ -1567,7 +1575,6 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
char *accused = NULL; /* Accused others without any self-accusal */
char *pending = NULL; /* Have pending operations on others */
char *self_accused = NULL; /* Accused itself */
- int min_participants = -1;
priv = this->private;
@@ -1591,12 +1598,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
}
}
- if (type == AFR_DATA_TRANSACTION) {
- min_participants = priv->child_count;
- } else {
- min_participants = AFR_SH_MIN_PARTICIPANTS;
- }
- if (afr_success_count(replies, priv->child_count) < min_participants) {
+ if (afr_success_count(replies, priv->child_count) < priv->child_count) {
/* Treat this just like locks not being acquired */
return -ENOTCONN;
}
@@ -1657,7 +1659,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
}
}
- if (type == AFR_DATA_TRANSACTION)
+ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION)
afr_selfheal_post_op_failure_accounting(priv, accused, sources,
locked_on);
@@ -1765,11 +1767,9 @@ afr_selfheal_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (xdata) {
local->replies[i].xdata = dict_ref(xdata);
ret = dict_get_int8(xdata, "link-count", &need_heal);
- local->replies[i].need_heal = need_heal;
- } else {
- local->replies[i].need_heal = need_heal;
}
+ local->replies[i].need_heal = need_heal;
syncbarrier_wake(&local->barrier);
return 0;
@@ -1825,6 +1825,37 @@ afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
return inode;
}
+static int
+afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ char *key1 = NULL;
+ char *key2 = NULL;
+
+ priv = this->private;
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
+
+ ret = dict_set_uint32(dict, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, 1);
+ if (ret)
+ return ret;
+
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ ret = dict_set_uint32(dict, key1, 1);
+ if (ret)
+ return ret;
+
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
+ ret = dict_set_uint32(dict, key2, 1);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
int
afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
uuid_t gfid, struct afr_reply *replies,
@@ -1851,6 +1882,11 @@ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
return -ENOMEM;
}
+ if (afr_set_multi_dom_lock_count_request(frame->this, xattr_req)) {
+ dict_unref(xattr_req);
+ return -1;
+ }
+
loc.inode = inode_ref(inode);
gf_uuid_copy(loc.gfid, gfid);
@@ -1869,17 +1905,16 @@ int
afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
dict_t *dict = NULL;
- priv = frame->this->private;
local = frame->local;
- if (local && local->xattr_req)
+
+ if (local->xattr_req)
dict = local->xattr_req;
return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
- priv->child_up, dict);
+ local->child_up, dict);
}
unsigned int
@@ -2246,7 +2281,8 @@ int
afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
inode_t **link_inode, gf_boolean_t *data_selfheal,
gf_boolean_t *metadata_selfheal,
- gf_boolean_t *entry_selfheal)
+ gf_boolean_t *entry_selfheal,
+ struct afr_reply *replies_dst)
{
afr_private_t *priv = NULL;
inode_t *inode = NULL;
@@ -2382,6 +2418,8 @@ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
ret = 0;
out:
+ if (replies && replies_dst)
+ afr_replies_copy(replies_dst, replies, priv->child_count);
if (inode)
inode_unref(inode);
if (replies)
@@ -2421,8 +2459,11 @@ afr_frame_create(xlator_t *this, int32_t *op_errno)
pid_t pid = GF_CLIENT_PID_SELF_HEALD;
frame = create_frame(this, this->ctx->pool);
- if (!frame)
+ if (!frame) {
+ if (op_errno)
+ *op_errno = ENOMEM;
return NULL;
+ }
local = AFR_FRAME_INIT(frame, (*op_errno));
if (!local) {
@@ -2498,7 +2539,7 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
ret = afr_selfheal_unlocked_inspect(frame, this, gfid, &inode,
&data_selfheal, &metadata_selfheal,
- &entry_selfheal);
+ &entry_selfheal, NULL);
if (ret)
goto out;
@@ -2709,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
out:
return source;
}
+
+static int
+afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->replies[i].poststat = *buf;
+ local->replies[i].preparent = *preparent;
+ local->replies[i].postparent = *postparent;
+ }
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ unsigned char *mkdir_on = alloca0(priv->child_count);
+ unsigned char *lookup_on = alloca0(priv->child_count);
+ loc_t loc = {0};
+ int32_t op_errno = 0;
+ int32_t child_op_errno = 0;
+ struct iatt iatt = {0};
+ dict_t *xdata = NULL;
+ uuid_t anon_inode_gfid = {0};
+ int mkdir_count = 0;
+ int i = 0;
+
+ /*Try to mkdir everywhere and return success if the dir exists on 'child'
+ */
+
+ if (!priv->use_anon_inode) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &op_errno);
+ if (op_errno) {
+ goto out;
+ }
+ local = frame->local;
+ if (!local->child_up[child]) {
+ /*Other bricks may need mkdir so don't error out yet*/
+ child_op_errno = ENOTCONN;
+ }
+ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ if (priv->anon_inode[i]) {
+ mkdir_on[i] = 0;
+ } else {
+ mkdir_on[i] = 1;
+ mkdir_count++;
+ }
+ }
+
+ if (mkdir_count == 0) {
+ *linked_inode = inode_find(this->itable, anon_inode_gfid);
+ if (*linked_inode) {
+ op_errno = 0;
+ goto out;
+ }
+ }
+
+ loc.parent = inode_ref(this->itable->root);
+ loc.name = priv->anon_inode_name;
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
+ if (op_errno) {
+ goto out;
+ }
+
+ if (mkdir_count == 0) {
+ memcpy(lookup_on, local->child_up, priv->child_count);
+ goto lookup;
+ }
+
+ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
+ xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!mkdir_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno == EEXIST) {
+ lookup_on[i] = 1;
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+
+ if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
+ goto link;
+ }
+
+lookup:
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xdata);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!lookup_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ if (gf_uuid_compare(anon_inode_gfid,
+ local->replies[i].poststat.ia_gfid) == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else {
+ if (i == child)
+ child_op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
+ "%s has gfid: %s", priv->anon_inode_name,
+ uuid_utoa(local->replies[i].poststat.ia_gfid));
+ }
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+link:
+ if (!gf_uuid_is_null(iatt.ia_gfid)) {
+ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
+ if (*linked_inode) {
+ op_errno = 0;
+ inode_lookup(*linked_inode);
+ } else {
+ op_errno = ENOMEM;
+ }
+ goto out;
+ }
+
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ /*child_op_errno takes precedence*/
+ if (child_op_errno == 0) {
+ child_op_errno = op_errno;
+ }
+
+ if (child_op_errno && *linked_inode) {
+ inode_unref(*linked_inode);
+ *linked_inode = NULL;
+ }
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return -child_op_errno;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 18a033467ba..37bcc2b3f9e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -225,24 +225,40 @@ __afr_selfheal_data_read_write(call_frame_t *frame, xlator_t *this, fd_t *fd,
return ret;
}
+static gf_boolean_t
+afr_source_sinks_locked(xlator_t *this, unsigned char *locked_on, int source,
+ unsigned char *healed_sinks)
+{
+ afr_private_t *priv = this->private;
+ int i = 0;
+
+ if (!locked_on[source])
+ return _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i] && locked_on[i])
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
static int
afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd,
int source, unsigned char *healed_sinks, off_t offset,
size_t size, int type, struct afr_reply *replies)
{
int ret = -1;
- int sink_count = 0;
afr_private_t *priv = NULL;
unsigned char *data_lock = NULL;
priv = this->private;
- sink_count = AFR_COUNT(healed_sinks, priv->child_count);
data_lock = alloca0(priv->child_count);
ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size,
data_lock);
{
- if (ret < sink_count) {
+ if (!afr_source_sinks_locked(this, data_lock, source, healed_sinks)) {
ret = -ENOTCONN;
goto unlock;
}
@@ -324,6 +340,9 @@ afr_selfheal_data_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
call_frame_t *iter_frame = NULL;
unsigned char arbiter_sink_status = 0;
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "performing data selfheal on %s", uuid_utoa(fd->inode->gfid));
+
priv = this->private;
if (priv->arbiter_count) {
arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 3e0d17a1b9b..64893f441e3 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -16,54 +16,170 @@
#include <glusterfs/syncop-utils.h>
#include <glusterfs/events.h>
-static int
-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
- inode_t *inode, int child, struct afr_reply *replies)
+int
+afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child,
+ struct afr_reply *replies,
+ gf_boolean_t *anon_inode)
{
afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
xlator_t *subvol = NULL;
int ret = 0;
+ int i = 0;
+ char g[64] = {0};
+ unsigned char *lookup_success = NULL;
+ call_frame_t *frame = NULL;
+ loc_t loc2 = {
+ 0,
+ };
loc_t loc = {
0,
};
- char g[64];
priv = this->private;
-
subvol = priv->children[child];
+ lookup_success = alloca0(priv->child_count);
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (replies[child].poststat.ia_type == IA_IFDIR) {
+ /* This directory may have sub-directory hierarchy which may need to
+ * be preserved for subsequent heals. So unconditionally move the
+ * directory to anonymous-inode directory*/
+ *anon_inode = _gf_true;
+ goto anon_inode;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ lookup_success[i] = 1;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ ret = -local->replies[i].op_errno;
+ }
+ }
+
+ if (priv->quorum_count) {
+ if (afr_has_quorum(lookup_success, this, NULL)) {
+ *anon_inode = _gf_true;
+ }
+ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
+ *anon_inode = _gf_true;
+ } else if (ret) {
+ goto out;
+ }
+
+anon_inode:
+ if (!*anon_inode) {
+ ret = 0;
+ goto out;
+ }
loc.parent = inode_ref(dir);
gf_uuid_copy(loc.pargfid, dir->gfid);
loc.name = name;
- loc.inode = inode_ref(inode);
- if (replies[child].valid && replies[child].op_ret == 0) {
- switch (replies[child].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_unlink(subvol, &loc, NULL, NULL);
- break;
- }
+ ret = afr_anon_inode_create(this, child, &loc2.parent);
+ if (ret < 0)
+ goto out;
+
+ loc2.name = g;
+ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s failed",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s successful",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
}
+out:
loc_wipe(&loc);
+ loc_wipe(&loc2);
+ if (frame) {
+ AFR_STACK_DESTROY(frame);
+ }
return ret;
}
int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies)
+{
+ char g[64] = {0};
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t anon_inode = _gf_false;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
+ /*Nothing to do*/
+ ret = 0;
+ goto out;
+ }
+
+ if (priv->use_anon_inode) {
+ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
+ replies, &anon_inode);
+ if (ret < 0 || anon_inode)
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ switch (replies[child].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ break;
+ }
+
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
unsigned char *sources, inode_t *dir,
const char *name, inode_t *inode,
@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
loc_t srcloc = {
0,
};
+ loc_t anonloc = {
+ 0,
+ };
xlator_t *this = frame->this;
afr_private_t *priv = NULL;
dict_t *xdata = NULL;
@@ -86,15 +205,18 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
0,
};
unsigned char *newentry = NULL;
+ char iatt_uuid_str[64] = {0};
+ char dir_uuid_str[64] = {0};
priv = this->private;
iatt = &replies[source].poststat;
+ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
"Invalid ia_type (%d) or gfid(%s). source brick=%d, "
"pargfid=%s, name=%s",
- iatt->ia_type, uuid_utoa(iatt->ia_gfid), source,
- uuid_utoa(dir->gfid), name);
+ iatt->ia_type, iatt_uuid_str, source,
+ uuid_utoa_r(dir->gfid, dir_uuid_str), name);
ret = -EINVAL;
goto out;
}
@@ -119,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
srcloc.inode = inode_ref(inode);
gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
- if (iatt->ia_type != IA_IFDIR)
- ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
- if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) {
+ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == -ENOENT || ret == -ESTALE) {
newentry[dst] = 1;
ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
sources, newentry);
if (ret)
goto out;
+ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
+ // Try rename from hidden directory
+ ret = afr_anon_inode_create(this, dst, &anonloc.parent);
+ if (ret < 0)
+ goto out;
+ anonloc.inode = inode_ref(inode);
+ anonloc.name = iatt_uuid_str;
+ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = -1; /*This sets 'mismatch' to true*/
+ goto out;
}
mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
@@ -165,6 +297,7 @@ out:
GF_FREE(linkname);
loc_wipe(&loc);
loc_wipe(&srcloc);
+ loc_wipe(&anonloc);
return ret;
}
@@ -246,6 +379,19 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
if (replies[i].op_ret != 0)
continue;
+ if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
+ continue;
+
+ if (replies[i].poststat.ia_type == IA_INVAL)
+ continue;
+
+ if (ia_type == IA_INVAL || gf_uuid_is_null(gfid)) {
+ src_idx = i;
+ ia_type = replies[src_idx].poststat.ia_type;
+ gfid = &replies[src_idx].poststat.ia_gfid;
+ continue;
+ }
+
if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) &&
(ia_type == replies[i].poststat.ia_type)) {
ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
@@ -466,6 +612,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
afr_private_t *priv = NULL;
int source = -1;
int sources_count = 0;
+ int i = 0;
priv = this->private;
@@ -479,6 +626,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
}
source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
+
+ /*If the selected source does not blame any other brick, then mark
+ * everything as sink to trigger conservative merge.
+ */
+ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+ }
+ return -1;
+ }
+
return source;
}
@@ -549,6 +710,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
+ if (afr_is_private_directory(priv, fd->inode->gfid, name,
+ GF_CLIENT_PID_SELF_HEALD)) {
+ return 0;
+ }
+
xattr = dict_new();
if (!xattr)
return -ENOMEM;
@@ -569,7 +735,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes "
@@ -597,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
replies);
if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
- ret = afr_shd_index_purge(subvol, parent_idx_inode, name,
+ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
inode->ia_type);
/* Why is ret force-set to 0? We do not care about
* index purge failing for full heal as it is quite
@@ -727,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR))
- continue;
-
ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
loc.inode, subvol,
local->need_full_crawl);
@@ -793,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
/* The name indices under the pgfid index dir are guaranteed
* to be regular files. Hence the hardcoding.
*/
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
ret = 0;
goto out;
}
@@ -832,6 +994,8 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
subvol = priv->children[subvol_idx];
args.frame = afr_copy_frame(frame);
+ if (!args.frame)
+ goto out;
args.xl = this;
/* args.heal_fd represents the fd associated with the original directory
* on which entry heal is being attempted.
@@ -850,9 +1014,10 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
* do not treat heal as failure.
*/
if (is_src)
- return -errno;
+ ret = -errno;
else
- return 0;
+ ret = 0;
+ goto out;
}
ret = syncop_dir_scan(subvol, &loc, GF_CLIENT_PID_SELF_HEALD, &args,
@@ -862,7 +1027,9 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (args.mismatch == _gf_true)
ret = -1;
-
+out:
+ if (args.frame)
+ AFR_STACK_DESTROY(args.frame);
return ret;
}
@@ -958,7 +1125,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
data_lock);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes could "
@@ -1082,7 +1249,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain,
NULL, locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes could "
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index ba43341d9c0..03f43bad16e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -190,6 +190,59 @@ out:
return ret;
}
+static int
+__afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ struct afr_reply *replies,
+ unsigned char *sources)
+{
+ int ret = 0;
+ int i = 0;
+ int m_idx = 0;
+ afr_private_t *priv = NULL;
+ int raw[AFR_NUM_CHANGE_LOGS] = {0};
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
+ raw[m_idx] = 1;
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i])
+ continue;
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO,
+ "Failed to set pending metadata xattr on child %d for %s", i,
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+ }
+
+ afr_replies_wipe(replies, priv->child_count);
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
+}
+
/*
* Look for mismatching uid/gid or mode or user xattrs even if
* AFR xattrs don't say so, and pick one arbitrarily as winner. */
@@ -210,6 +263,7 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
};
int source = -1;
int sources_count = 0;
+ int ret = 0;
priv = this->private;
@@ -300,7 +354,13 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
healed_sinks[i] = 1;
}
}
-
+ if ((sources_count == priv->child_count) && (source > -1) &&
+ (AFR_COUNT(healed_sinks, priv->child_count) != 0)) {
+ ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode,
+ replies, sources);
+ if (ret < 0)
+ return ret;
+ }
out:
afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
return source;
@@ -398,7 +458,7 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
data_lock);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
ret = -ENOTCONN;
goto unlock;
}
@@ -421,12 +481,8 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
if (ret)
goto unlock;
- /* Restore atime/mtime for files that don't need data heal as
- * restoring timestamps happens only as a part of data-heal.
- */
- if (!IA_ISREG(locked_replies[source].poststat.ia_type))
- afr_selfheal_restore_time(frame, this, inode, source, healed_sinks,
- locked_replies);
+ afr_selfheal_restore_time(frame, this, inode, source, healed_sinks,
+ locked_replies);
ret = afr_selfheal_undo_pending(
frame, this, inode, sources, sinks, healed_sinks, undid_pending,
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index 36640b5456b..834aac86d48 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
const char *bname, inode_t *inode,
struct afr_reply *replies)
{
- loc_t loc = {
- 0,
- };
int i = 0;
afr_private_t *priv = NULL;
- char g[64];
int ret = 0;
priv = this->private;
- loc.parent = inode_ref(parent);
- gf_uuid_copy(loc.pargfid, pargfid);
- loc.name = bname;
- loc.inode = inode_ref(inode);
-
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid)
continue;
@@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
if (replies[i].op_ret)
continue;
- switch (replies[i].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL);
- break;
- }
+ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
+ replies);
}
- loc_wipe(&loc);
-
return ret;
}
@@ -381,7 +352,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
replies, gfid, locked_on, source, sources,
is_gfid_absent, &gfid_idx);
- if (ret)
+ if (ret || (gfid_idx < 0))
return ret;
ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
@@ -514,7 +485,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
ret = -ENOTCONN;
goto unlock;
}
@@ -560,13 +531,15 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
struct afr_reply *replies = NULL;
inode_t *inode = NULL;
int first_idx = -1;
+ afr_local_t *local = NULL;
priv = this->private;
+ local = frame->local;
replies = alloca0(sizeof(*replies) * priv->child_count);
inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
- priv->child_up, NULL);
+ local->child_up, NULL);
if (!inode)
return -ENOMEM;
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 703f80e05cb..48e6dbcfb18 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -11,8 +11,6 @@
#ifndef _AFR_SELFHEAL_H
#define _AFR_SELFHEAL_H
-#define AFR_SH_MIN_PARTICIPANTS 2
-
/* Perform fop on all UP subvolumes and wait for all callbacks to return */
#define AFR_ONALL(frame, rfn, fop, args...) \
@@ -20,9 +18,8 @@
afr_local_t *__local = frame->local; \
afr_private_t *__priv = frame->this->private; \
int __i = 0, __count = 0; \
- unsigned char *__child_up = NULL; \
+ unsigned char *__child_up = alloca(__priv->child_count); \
\
- __child_up = alloca0(__priv->child_count); \
memcpy(__child_up, __priv->child_up, \
sizeof(*__child_up) * __priv->child_count); \
__count = AFR_COUNT(__child_up, __priv->child_count); \
@@ -48,13 +45,16 @@
afr_local_t *__local = frame->local; \
afr_private_t *__priv = frame->this->private; \
int __i = 0; \
- int __count = AFR_COUNT(list, __priv->child_count); \
+ int __count = 0; \
+ unsigned char *__list = alloca(__priv->child_count); \
\
+ memcpy(__list, list, sizeof(*__list) * __priv->child_count); \
+ __count = AFR_COUNT(__list, __priv->child_count); \
__local->barrier.waitfor = __count; \
afr_local_replies_wipe(__local, __priv); \
\
for (__i = 0; __i < __priv->child_count; __i++) { \
- if (!list[__i]) \
+ if (!__list[__i]) \
continue; \
STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
__priv->children[__i], \
@@ -83,9 +83,9 @@
#define ALLOC_MATRIX(n, type) \
({ \
- type **__ptr = NULL; \
int __i; \
- __ptr = alloca0(n * sizeof(type *)); \
+ type **__ptr = alloca(n * sizeof(type *)); \
+ \
for (__i = 0; __i < n; __i++) \
__ptr[__i] = alloca0(n * sizeof(type)); \
__ptr; \
@@ -326,7 +326,8 @@ int
afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
inode_t **link_inode, gf_boolean_t *data_selfheal,
gf_boolean_t *metadata_selfheal,
- gf_boolean_t *entry_selfheal);
+ gf_boolean_t *entry_selfheal,
+ struct afr_reply *replies);
int
afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid);
@@ -368,4 +369,9 @@ gf_boolean_t
afr_is_file_empty_on_all_children(afr_private_t *priv,
struct afr_reply *replies);
+int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies);
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 824f8c0127c..109fd4b7421 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -94,7 +94,7 @@ __afr_shd_healer_wait(struct subvol_healer *healer)
priv = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + priv->shd.timeout;
+ wait_till.tv_sec = gf_time() + priv->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -222,7 +222,7 @@ out:
}
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type)
{
int ret = 0;
@@ -371,8 +371,9 @@ afr_shd_sweep_prepare(struct subvol_healer *healer)
event->split_brain_count = 0;
event->heal_failed_count = 0;
- time(&event->start_time);
+ event->start_time = gf_time();
event->end_time = 0;
+ _mask_cancellation();
}
void
@@ -385,8 +386,8 @@ afr_shd_sweep_done(struct subvol_healer *healer)
event = &healer->crawl_event;
shd = &(((afr_private_t *)healer->this->private)->shd);
- time(&event->end_time);
- history = memdup(event, sizeof(*event));
+ event->end_time = gf_time();
+ history = gf_memdup(event, sizeof(*event));
event->start_time = 0;
if (!history)
@@ -394,6 +395,7 @@ afr_shd_sweep_done(struct subvol_healer *healer)
if (eh_save_history(shd->statistics[healer->subvol], history) < 0)
GF_FREE(history);
+ _unmask_cancellation();
}
int
@@ -422,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = afr_shd_selfheal(healer, healer->subvol, gfid);
if (ret == -ENOENT || ret == -ESTALE)
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, val);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
if (ret == 2)
/* If bricks crashed in pre-op after creating indices/xattrop
@@ -522,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
afr_private_t *priv = NULL;
priv = this->private;
+
+ if (this->cleanup_starting) {
+ return -ENOTCONN;
+ }
+
if (!priv->shd.enabled)
return -EBUSY;
@@ -794,6 +801,218 @@ afr_bricks_available_for_heal(afr_private_t *priv)
return _gf_true;
}
+static gf_boolean_t
+afr_shd_ta_needs_heal(xlator_t *this, struct subvol_healer *healer)
+{
+ dict_t *xdata = NULL;
+ afr_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ int i = 0;
+ gf_boolean_t need_heal = _gf_false;
+
+ priv = this->private;
+
+ ret = afr_shd_fill_ta_loc(this, &loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ healer->rerun = 1;
+ goto out;
+ }
+
+ if (_afr_shd_ta_get_xattrs(this, &loc, &xdata)) {
+ healer->rerun = 1;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (afr_ta_dict_contains_pending_xattr(xdata, priv, i)) {
+ need_heal = _gf_true;
+ break;
+ }
+ }
+
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+
+ return need_heal;
+}
+
+static int
+afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = healer->this->private;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int count = 0;
+ int i = 0;
+ int op_errno = 0;
+ struct iatt *iatt = NULL;
+ gf_boolean_t multiple_links = _gf_false;
+ unsigned char *gfid_present = alloca0(priv->child_count);
+ unsigned char *entry_present = alloca0(priv->child_count);
+ char *type = "file";
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ gf_msg_debug(healer->this->name, 0,
+ "Not all bricks are up. Skipping "
+ "cleanup of %s on %s",
+ entry->d_name, subvol->name);
+ ret = 0;
+ goto out;
+ }
+
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = gf_uuid_parse(entry->d_name, loc.gfid);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ gfid_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ if (iatt->ia_type == IA_IFDIR) {
+ type = "dir";
+ }
+
+ if (i == healer->subvol) {
+ if (local->replies[i].poststat.ia_nlink > 1) {
+ multiple_links = _gf_true;
+ }
+ }
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*Inode is deleted from subvol*/
+ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
+ priv->anon_inode_name, entry->d_name, subvol->name);
+ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
+ iatt->ia_type);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = 0;
+ } else if (count > 1) {
+ loc_wipe(&loc);
+ loc.parent = inode_ref(parent->inode);
+ loc.name = entry->d_name;
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
+ &loc, NULL);
+ count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ entry_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (gfid_present[i] && !entry_present[i]) {
+ /*Entry is not anonymous on at least one subvol*/
+ gf_msg_debug(healer->this->name, 0,
+ "Valid entry present on %s "
+ "Skipping cleanup of %s on %s",
+ priv->children[i]->name, entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
+ entry->d_name);
+ ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
+ entry->d_name, iatt->ia_type);
+ if (op_errno != ENOENT && op_errno != ESTALE) {
+ ret |= -op_errno;
+ }
+ }
+ }
+
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return ret;
+}
+
+static void
+afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = healer->this->private;
+ loc_t loc = {0};
+
+ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
+ if (ret)
+ goto out;
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer,
+ afr_shd_anon_inode_cleaner, NULL,
+ priv->shd.max_threads, priv->shd.wait_qlength);
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return;
+}
+
void *
afr_shd_index_healer(void *data)
{
@@ -820,7 +1039,8 @@ afr_shd_index_healer(void *data)
priv->local[healer->subvol] = healer->local;
if (priv->thin_arbiter_count) {
- afr_shd_ta_get_xattrs(this, &loc, healer, &pre_crawl_xdata);
+ if (afr_shd_ta_needs_heal(this, healer))
+ afr_shd_ta_get_xattrs(this, &loc, healer, &pre_crawl_xdata);
}
do {
@@ -850,6 +1070,10 @@ afr_shd_index_healer(void *data)
sleep(1);
} while (ret > 0);
+ if (ret == 0) {
+ afr_cleanup_anon_inode_dir(healer);
+ }
+
if (ret == 0 && pre_crawl_xdata &&
!healer->crawl_event.heal_failed_count) {
afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
@@ -974,7 +1198,7 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
{
int ret = 0;
uint64_t count = 0;
- char key[256] = {0};
+ char key[128] = {0};
int keylen = 0;
char suffix[64] = {0};
int xl_id = 0;
@@ -1099,9 +1323,9 @@ afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path,
{
int ret = -1;
uint64_t count = 0;
- char key[256] = {0};
+ char key[64] = {0};
int keylen = 0;
- char xl_id_child_str[64] = {0};
+ char xl_id_child_str[32] = {0};
int xl_id = 0;
ret = dict_get_int32(output, this->name, &xl_id);
@@ -1324,19 +1548,40 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
int op_ret = 0;
uint64_t cnt = 0;
+#define AFR_SET_DICT_AND_LOG(name, output, key, keylen, dict_str, \
+ dict_str_len) \
+ { \
+ int ret; \
+ \
+ ret = dict_set_nstrn(output, key, keylen, dict_str, dict_str_len); \
+ if (ret) { \
+ gf_smsg(name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, \
+ "key=%s", key, "value=%s", dict_str, NULL); \
+ } \
+ }
+
priv = this->private;
shd = &priv->shd;
ret = dict_get_int32_sizen(input, "xl-op", (int32_t *)&op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=xl-op", NULL);
goto out;
+ }
this_name_len = strlen(this->name);
ret = dict_get_int32n(input, this->name, this_name_len, &xl_id);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=%s", this->name, NULL);
goto out;
+ }
ret = dict_set_int32n(output, this->name, this_name_len, xl_id);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "key=%s", this->name, NULL);
goto out;
+ }
switch (op) {
case GF_SHD_OP_HEAL_INDEX:
op_ret = 0;
@@ -1346,23 +1591,30 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
if (!priv->child_up[i]) {
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SBRICK_NOT_CONNECTED,
SLEN(SBRICK_NOT_CONNECTED));
op_ret = -1;
} else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SLESS_THAN2_BRICKS_in_REP,
SLEN(SLESS_THAN2_BRICKS_in_REP));
op_ret = -1;
} else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
- ret = dict_set_nstrn(output, key, keylen, SBRICK_IS_REMOTE,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
SLEN(SBRICK_IS_REMOTE));
} else {
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SSTARTED_SELF_HEAL,
SLEN(SSTARTED_SELF_HEAL));
- afr_shd_index_healer_spawn(this, i);
+
+ ret = afr_shd_index_healer_spawn(this, i);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
}
}
break;
@@ -1374,21 +1626,28 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
if (!priv->child_up[i]) {
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SBRICK_NOT_CONNECTED,
SLEN(SBRICK_NOT_CONNECTED));
} else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SLESS_THAN2_BRICKS_in_REP,
SLEN(SLESS_THAN2_BRICKS_in_REP));
} else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
- ret = dict_set_nstrn(output, key, keylen, SBRICK_IS_REMOTE,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
SLEN(SBRICK_IS_REMOTE));
} else {
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SSTARTED_SELF_HEAL,
SLEN(SSTARTED_SELF_HEAL));
- afr_shd_full_healer_spawn(this, i);
+
+ ret = afr_shd_full_healer_spawn(this, i);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
op_ret = 0;
}
}
@@ -1396,24 +1655,25 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
case GF_SHD_OP_INDEX_SUMMARY:
/* this case has been handled in glfs-heal.c */
break;
- case GF_SHD_OP_HEALED_FILES:
- case GF_SHD_OP_HEAL_FAILED_FILES:
- for (i = 0; i < priv->child_count; i++) {
- keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
- ret = dict_set_nstrn(output, key, keylen, SOP_NOT_SUPPORTED,
- SLEN(SOP_NOT_SUPPORTED));
- }
- break;
case GF_SHD_OP_SPLIT_BRAIN_FILES:
eh_dump(shd->split_brain, output, afr_add_shd_event);
break;
case GF_SHD_OP_STATISTICS:
for (i = 0; i < priv->child_count; i++) {
eh_dump(shd->statistics[i], output, afr_add_crawl_event);
- afr_shd_dict_add_crawl_event(
+ ret = afr_shd_dict_add_crawl_event(
this, output, &shd->index_healers[i].crawl_event);
- afr_shd_dict_add_crawl_event(this, output,
- &shd->full_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
+
+ ret = afr_shd_dict_add_crawl_event(
+ this, output, &shd->full_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
}
break;
case GF_SHD_OP_STATISTICS_HEAL_COUNT:
@@ -1424,7 +1684,7 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
if (!priv->child_up[i]) {
keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id,
i);
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SBRICK_NOT_CONNECTED,
SLEN(SBRICK_NOT_CONNECTED));
} else {
@@ -1433,6 +1693,10 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
if (ret == 0) {
ret = dict_set_uint64(output, key, cnt);
}
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_DICT_SET_FAILED, NULL);
+ }
op_ret = 0;
}
}
@@ -1440,11 +1704,13 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
break;
default:
- gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG,
- "Unknown set op %d", op);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "op=%d",
+ op, NULL);
break;
}
out:
dict_deln(output, this->name, this_name_len);
return op_ret;
+
+#undef AFR_SET_DICT_AND_LOG
}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 19905394540..18db728ea7b 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -14,12 +14,11 @@
#include <pthread.h>
typedef struct {
- int child;
char *path;
+ int child;
} shd_event_t;
typedef struct {
- int child;
uint64_t healed_count;
uint64_t split_brain_count;
uint64_t heal_failed_count;
@@ -31,32 +30,33 @@ typedef struct {
cralwer is in progress */
time_t end_time;
char *crawl_type;
+ int child;
} crawl_event_t;
struct subvol_healer {
xlator_t *this;
- int subvol;
- gf_boolean_t local;
- gf_boolean_t running;
- gf_boolean_t rerun;
crawl_event_t crawl_event;
pthread_mutex_t mutex;
pthread_cond_t cond;
pthread_t thread;
+ int subvol;
+ gf_boolean_t local;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
};
typedef struct {
- gf_boolean_t iamshd;
- gf_boolean_t enabled;
- int timeout;
struct subvol_healer *index_healers;
struct subvol_healer *full_healers;
eh_t *split_brain;
eh_t **statistics;
+ int timeout;
uint32_t max_threads;
uint32_t wait_qlength;
uint32_t halo_max_latency_msec;
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
} afr_self_heald_t;
int
@@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
char **path_p);
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type);
#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index bdc4bfc0b10..a51f79b1f43 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -124,9 +124,9 @@ afr_release_notify_lock_for_ta(void *opaque)
this = (xlator_t *)opaque;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate loc for thin-arbiter.");
goto out;
}
@@ -521,42 +521,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this)
local->transaction.pre_op_sources[j] = 0;
}
-gf_boolean_t
-afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- xlator_t *this = NULL;
- gf_boolean_t fop_failed = _gf_false;
- unsigned char *pre_op_sources = NULL;
- int i = 0;
-
- local = frame->local;
- this = frame->this;
- priv = this->private;
- pre_op_sources = local->transaction.pre_op_sources;
-
- /* If the fop failed on the brick, it is not a source. */
- for (i = 0; i < priv->child_count; i++)
- if (local->transaction.failed_subvols[i])
- pre_op_sources[i] = 0;
-
- switch (AFR_COUNT(pre_op_sources, priv->child_count)) {
- case 1:
- if (pre_op_sources[ARBITER_BRICK_INDEX])
- fop_failed = _gf_true;
- break;
- case 0:
- fop_failed = _gf_true;
- break;
- }
-
- if (fop_failed)
- return _gf_false;
-
- return _gf_true;
-}
-
void
afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this)
{
@@ -873,7 +837,7 @@ afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame)
priv = this->private;
up_children_count = AFR_COUNT(subvols, priv->child_count);
- if (afr_lookup_has_quorum(frame, this, subvols))
+ if (afr_lookup_has_quorum(frame, up_children_count))
return _gf_true;
if (priv->quorum_count == AFR_QUORUM_AUTO) {
@@ -971,12 +935,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this)
priv->child_count)
return _gf_false;
- if (priv->arbiter_count) {
- if (!afr_has_arbiter_fop_cbk_quorum(frame))
- need_dirty = _gf_true;
- } else if (!afr_has_fop_cbk_quorum(frame)) {
+ if (!afr_has_fop_cbk_quorum(frame))
need_dirty = _gf_true;
- }
return need_dirty;
}
@@ -1026,12 +986,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this)
* no split-brain with the fix. The problem is eliminated completely.
*/
- if (priv->arbiter_count) {
- if (afr_has_arbiter_fop_cbk_quorum(frame))
- return;
- } else if (afr_has_fop_cbk_quorum(frame)) {
+ if (afr_has_fop_cbk_quorum(frame))
return;
- }
if (afr_need_dirty_marking(frame, this))
goto set_response;
@@ -1073,7 +1029,7 @@ set_response:
}
int
-afr_fill_ta_loc(xlator_t *this, loc_t *loc)
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop)
{
afr_private_t *priv = NULL;
@@ -1081,6 +1037,11 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc)
loc->parent = inode_ref(priv->root_inode);
gf_uuid_copy(loc->pargfid, loc->parent->gfid);
loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ if (is_gfid_based_fop && gf_uuid_is_null(priv->ta_gfid)) {
+ /* Except afr_ta_id_file_check() which is path based, all other gluster
+ * FOPS need gfid.*/
+ return -EINVAL;
+ }
gf_uuid_copy(loc->gfid, priv->ta_gfid);
loc->inode = inode_new(loc->parent->table);
if (!loc->inode) {
@@ -1090,86 +1051,6 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc)
return 0;
}
-int
-afr_changelog_thin_arbiter_post_op(xlator_t *this, afr_local_t *local)
-{
- int ret = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- int failed_count = 0;
- struct gf_flock flock = {
- 0,
- };
- loc_t loc = {
- 0,
- };
- int i = 0;
-
- priv = this->private;
- if (!priv->thin_arbiter_count)
- return 0;
-
- failed_count = AFR_COUNT(local->transaction.failed_subvols,
- priv->child_count);
- if (!failed_count)
- return 0;
-
- GF_ASSERT(failed_count == 1);
- ret = afr_fill_ta_loc(this, &loc);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Failed to populate thin-arbiter loc for: %s.", loc.name);
- goto out;
- }
-
- xattr = dict_new();
- if (!xattr) {
- ret = -ENOMEM;
- goto out;
- }
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_static_bin(xattr, priv->pending_key[i],
- local->pending[i],
- AFR_NUM_CHANGE_LOGS * sizeof(int));
- if (ret)
- goto out;
- }
-
- flock.l_type = F_WRLCK;
- flock.l_start = 0;
- flock.l_len = 0;
-
- /*TODO: Convert to two domain locking. */
- ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
- AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock, NULL, NULL);
- if (ret)
- goto out;
-
- ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
- GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
-
- if (ret == -EINVAL) {
- gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB,
- "Thin-arbiter has denied post-op on %s for gfid %s.",
- priv->pending_key[THIN_ARBITER_BRICK_INDEX],
- uuid_utoa(local->inode->gfid));
-
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Post-op on thin-arbiter id file %s failed for gfid %s.",
- priv->pending_key[THIN_ARBITER_BRICK_INDEX],
- uuid_utoa(local->inode->gfid));
- }
- flock.l_type = F_UNLCK;
- syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], AFR_TA_DOM_NOTIFY,
- &loc, F_SETLK, &flock, NULL, NULL);
-out:
- if (xattr)
- dict_unref(xattr);
-
- return ret;
-}
-
static int
afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque)
{
@@ -1202,6 +1083,7 @@ afr_set_changelog_xattr(afr_private_t *priv, unsigned char *pending,
{
int **changelog = NULL;
int idx = 0;
+ int ret = 0;
int i;
if (local->is_new_entry == _gf_true) {
@@ -1217,7 +1099,11 @@ afr_set_changelog_xattr(afr_private_t *priv, unsigned char *pending,
if (local->transaction.failed_subvols[i])
changelog[i][idx] = hton32(1);
}
- afr_set_pending_dict(priv, xattr, changelog);
+ ret = afr_set_pending_dict(priv, xattr, changelog);
+ if (ret < 0) {
+ afr_matrix_cleanup(changelog, priv->child_count);
+ return NULL;
+ }
}
out:
@@ -1259,9 +1145,9 @@ afr_ta_post_op_do(void *opaque)
this = local->transaction.frame->this;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate loc for thin-arbiter.");
goto out;
}
@@ -1285,8 +1171,10 @@ afr_ta_post_op_do(void *opaque)
changelog = afr_set_changelog_xattr(priv, pending, xattr, local);
- if (!changelog)
+ if (!changelog) {
+ ret = -ENOMEM;
goto out;
+ }
ret = afr_ta_post_op_lock(this, &loc);
if (ret)
@@ -2459,8 +2347,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this,
goto out;
}
- if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) {
- /*Only allow writes but shard does [f]xattrops on writes, so
+ if (local->transaction.disable_delayed_post_op) {
+ goto out;
+ }
+
+ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) &&
+ (local->op != GF_FOP_FSYNC)) {
+ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so
* they are fine too*/
goto out;
}
@@ -2587,8 +2480,10 @@ afr_changelog_fsync(call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
xdata = dict_new();
- if (xdata)
+ if (xdata) {
ret = dict_set_int32_sizen(xdata, "batch-fsync", 1);
+ ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ }
for (i = 0; i < priv->child_count; i++) {
if (!local->transaction.pre_op[i])
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 0b27f507843..df7366f0a65 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
}
}
+void
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
+{
+ char *volfile_id_str = NULL;
+ uuid_t anon_inode_gfid = {0};
+
+ /*If volume id is not present don't enable anything*/
+ if (dict_get_str(options, "volume-id", &volfile_id_str))
+ return;
+ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
+ /*anon_inode_name is not supposed to change once assigned*/
+ if (!priv->anon_inode_name[0]) {
+ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
+ AFR_ANON_DIR_PREFIX, volfile_id_str);
+ gf_uuid_parse(volfile_id_str, anon_inode_gfid);
+ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
+ anon_inode_gfid[0] ^= 1;
+ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
+ }
+}
+
int
reconfigure(xlator_t *this, dict_t *options)
{
@@ -168,7 +189,8 @@ reconfigure(xlator_t *this, dict_t *options)
bool, out);
GF_OPTION_RECONF("data-self-heal", data_self_heal, options, str, out);
- gf_string2boolean(data_self_heal, &priv->data_self_heal);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool,
out);
@@ -241,6 +263,8 @@ reconfigure(xlator_t *this, dict_t *options)
out);
GF_OPTION_RECONF("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF("optimistic-change-log", priv->optimistic_change_log,
+ options, bool, out);
GF_OPTION_RECONF("quorum-type", qtype, options, str, out);
GF_OPTION_RECONF("quorum-count", priv->quorum_count, options, uint32, out);
fix_quorum_options(this, priv, qtype, options);
@@ -287,6 +311,10 @@ reconfigure(xlator_t *this, dict_t *options)
consistent_io = _gf_false;
priv->consistent_io = consistent_io;
+ afr_handle_anon_inode_options(priv, options);
+
+ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
+ out);
if (priv->shd.enabled) {
if ((priv->shd.enabled != enabled_old) ||
(timeout_old != priv->shd.timeout))
@@ -417,6 +445,8 @@ init(xlator_t *this)
goto out;
priv = this->private;
+ INIT_LIST_HEAD(&priv->saved_locks);
+ INIT_LIST_HEAD(&priv->lk_healq);
LOCK_INIT(&priv->lock);
child_count = xlator_subvolume_count(this);
@@ -481,7 +511,8 @@ init(xlator_t *this)
GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out);
GF_OPTION_INIT("data-self-heal", data_self_heal, str, out);
- gf_string2boolean(data_self_heal, &priv->data_self_heal);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
GF_OPTION_INIT("data-self-heal-algorithm", data_self_heal_algorithm, str,
out);
@@ -535,7 +566,9 @@ init(xlator_t *this)
GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
+ afr_handle_anon_inode_options(priv, this->options);
+ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
if (priv->quorum_count != 0)
priv->consistent_io = _gf_false;
@@ -547,13 +580,19 @@ init(xlator_t *this)
goto out;
}
+ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
+
priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
gf_afr_mt_char);
priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
gf_afr_mt_child_latency_t);
+ priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- if (!priv->child_up || !priv->child_latency) {
+ if (!priv->child_up || !priv->child_latency || !priv->halo_child_up ||
+ !priv->anon_inode) {
ret = -ENOMEM;
goto out;
}
@@ -621,19 +660,83 @@ init(xlator_t *this)
out:
return ret;
}
+void
+afr_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ int ret = -1;
+
+ if (!healer)
+ return;
+
+ if (healer->running) {
+ /*
+ * If there are any resources to cleanup, We need
+ * to do that gracefully using pthread_cleanup_push
+ */
+ ret = gf_thread_cleanup_xint(healer->thread);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SELF_HEAL_FAILED,
+ "Failed to clean up healer threads.");
+ healer->thread = 0;
+ }
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
+}
+
+void
+afr_selfheal_daemon_fini(xlator_t *this)
+{
+ struct subvol_healer *healer = NULL;
+ afr_self_heald_t *shd = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ if (shd->statistics[i])
+ eh_destroy(shd->statistics[i]);
+ }
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
+ GF_FREE(shd->statistics);
+ if (shd->split_brain)
+ eh_destroy(shd->split_brain);
+}
void
fini(xlator_t *this)
{
afr_private_t *priv = NULL;
priv = this->private;
+
+ afr_selfheal_daemon_fini(this);
+ GF_ASSERT(list_empty(&priv->saved_locks));
+
LOCK(&priv->lock);
if (priv->timer != NULL) {
gf_timer_call_cancel(this->ctx, priv->timer);
priv->timer = NULL;
}
UNLOCK(&priv->lock);
+
+ if (this->local_pool != NULL) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+
this->private = NULL;
afr_priv_destroy(priv);
if (this->itable) {
@@ -665,6 +768,7 @@ struct xlator_fops fops = {
.getxattr = afr_getxattr,
.fgetxattr = afr_fgetxattr,
.readv = afr_readv,
+ .seek = afr_seek,
/* inode write */
.writev = afr_writev,
@@ -737,7 +841,7 @@ struct volume_options options[] = {
{.key = {"read-hash-mode"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
- .max = 3,
+ .max = 5,
.default_value = "1",
.op_version = {2},
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
@@ -750,7 +854,10 @@ struct volume_options options[] = {
"1 = hash by GFID of file (all clients use "
"same subvolume).\n"
"2 = hash by GFID of file and client PID.\n"
- "3 = brick having the least outstanding read requests."},
+ "3 = brick having the least outstanding read requests.\n"
+ "4 = brick having the least network ping latency.\n"
+ "5 = Hybrid mode between 3 and 4, ie least value among "
+ "network-latency multiplied by outstanding-read-requests."},
{
.key = {"choose-local"},
.type = GF_OPTION_TYPE_BOOL,
@@ -1210,6 +1317,14 @@ struct volume_options options[] = {
.tags = {"replicate"},
.description = "This option exists only for backward compatibility "
"and configuring it doesn't have any effect"},
+ {.key = {"use-anonymous-inode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "Setting this option heals directory renames efficiently"},
+
{.key = {NULL}},
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 600c7551801..d62f9a9caf2 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -39,7 +39,10 @@
#define AFR_TA_DOM_NOTIFY "afr.ta.dom-notify"
#define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"
+#define AFR_LK_HEAL_DOM "afr.lock-heal.domain"
+
#define AFR_HALO_MAX_LATENCY 99999
+#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
#define PFLAG_PENDING (1 << 0)
#define PFLAG_SBRAIN (1 << 1)
@@ -95,6 +98,25 @@ typedef int (*afr_changelog_resume_t)(call_frame_t *frame, xlator_t *this);
gf_fop_list[local->op], uuid_utoa(local->inode->gfid)); \
} while (0)
+#define AFR_ERROR_OUT_IF_FDCTX_INVALID(__fd, __this, __error, __label) \
+ do { \
+ afr_fd_ctx_t *__fd_ctx = NULL; \
+ __fd_ctx = afr_fd_ctx_get(__fd, __this); \
+ if (__fd_ctx && __fd_ctx->is_fd_bad) { \
+ __error = EBADF; \
+ goto __label; \
+ } \
+ } while (0)
+
+typedef enum {
+ AFR_READ_POLICY_FIRST_UP,
+ AFR_READ_POLICY_GFID_HASH,
+ AFR_READ_POLICY_GFID_PID_HASH,
+ AFR_READ_POLICY_LESS_LOAD,
+ AFR_READ_POLICY_LEAST_LATENCY,
+ AFR_READ_POLICY_LOAD_LATENCY_HYBRID,
+} afr_read_hash_mode_t;
+
typedef enum {
AFR_FAV_CHILD_NONE,
AFR_FAV_CHILD_BY_SIZE,
@@ -130,10 +152,23 @@ typedef enum {
} afr_ta_fop_state_t;
struct afr_nfsd {
- gf_boolean_t iamnfsd;
uint32_t halo_max_latency_msec;
+ gf_boolean_t iamnfsd;
};
+typedef struct _afr_lk_heal_info {
+ fd_t *fd;
+ int32_t cmd;
+ struct gf_flock flock;
+ dict_t *xdata_req;
+ unsigned char *locked_nodes;
+ struct list_head pos;
+ gf_lkowner_t lk_owner;
+ pid_t pid;
+ int32_t *child_up_event_gen;
+ int32_t *child_down_event_gen;
+} afr_lk_heal_info_t;
+
typedef struct _afr_private {
gf_lock_t lock; /* to guard access to child_count, etc */
unsigned int child_count; /* total number of children */
@@ -144,20 +179,21 @@ typedef struct _afr_private {
inode_t *root_inode;
+ int favorite_child; /* subvolume to be preferred in resolving
+ split-brain cases */
/* For thin-arbiter. */
- unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
uuid_t ta_gfid;
- unsigned char ta_child_up;
+ unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
int ta_bad_child_index;
int ta_event_gen;
- off_t ta_notify_dom_lock_offset;
- gf_boolean_t release_ta_notify_dom_lock;
unsigned int ta_in_mem_txn_count;
unsigned int ta_on_wire_txn_count;
struct list_head ta_waitq;
struct list_head ta_onwireq;
+ unsigned char *anon_inode;
unsigned char *child_up;
+ unsigned char *halo_child_up;
int64_t *child_latency;
unsigned char *local;
@@ -178,30 +214,31 @@ typedef struct _afr_private {
int32_t healers; /* No. of elements currently undergoing background
heal*/
+ gf_boolean_t release_ta_notify_dom_lock;
+
gf_boolean_t metadata_self_heal; /* on/off */
gf_boolean_t entry_self_heal; /* on/off */
gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */
int read_child; /* read-subvolume */
- unsigned int hash_mode; /* for when read_child is not set */
gf_atomic_t *pending_reads; /*No. of pending read cbks per child.*/
- int favorite_child; /* subvolume to be preferred in resolving
- split-brain cases */
- afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic
- resolution of split-brains.*/
+ gf_timer_t *timer; /* launched when parent up is received */
unsigned int wait_count; /* # of servers to wait for success */
- gf_timer_t *timer; /* launched when parent up is received */
-
+ unsigned char ta_child_up;
gf_boolean_t optimistic_change_log;
gf_boolean_t eager_lock;
gf_boolean_t pre_op_compat; /* on/off */
uint32_t post_op_delay_secs;
unsigned int quorum_count;
- char vol_uuid[UUID_SIZE + 1];
+ off_t ta_notify_dom_lock_offset;
+ afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic
+ resolution of split-brains.*/
+ afr_read_hash_mode_t hash_mode; /* for when read_child is not set */
+
int32_t *last_event;
/* @event_generation: Keeps count of number of events received which can
@@ -214,34 +251,41 @@ typedef struct _afr_private {
important as we might have had a network split brain.
*/
uint32_t event_generation;
+ char vol_uuid[UUID_SIZE + 1];
gf_boolean_t choose_local;
gf_boolean_t did_discovery;
- uint64_t sh_readdir_size;
gf_boolean_t ensure_durability;
+ gf_boolean_t halo_enabled;
+ gf_boolean_t consistent_metadata;
+ gf_boolean_t need_heal;
+ gf_boolean_t granular_locks;
+ uint64_t sh_readdir_size;
char *sh_domain;
char *afr_dirty;
- gf_boolean_t halo_enabled;
- uint32_t halo_max_latency_msec;
- uint32_t halo_max_replicas;
- uint32_t halo_min_replicas;
+ uint64_t spb_choice_timeout;
afr_self_heald_t shd;
struct afr_nfsd nfsd;
- gf_boolean_t consistent_metadata;
- uint64_t spb_choice_timeout;
- gf_boolean_t need_heal;
+ uint32_t halo_max_latency_msec;
+ uint32_t halo_max_replicas;
+ uint32_t halo_min_replicas;
- /* pump dependencies */
- void *pump_private;
- gf_boolean_t use_afr_in_pump;
- gf_boolean_t granular_locks;
gf_boolean_t full_lock;
gf_boolean_t esh_granular;
gf_boolean_t consistent_io;
gf_boolean_t data_self_heal; /* on/off */
+ gf_boolean_t use_anon_inode;
+
+ /*For lock healing.*/
+ struct list_head saved_locks;
+ struct list_head lk_healq;
+
+ /*For anon-inode handling */
+ char anon_inode_name[NAME_MAX + 1];
+ char anon_gfid_str[UUID_SIZE + 1];
} afr_private_t;
typedef enum {
@@ -305,18 +349,17 @@ afr_entry_lockee_cmp(const void *l1, const void *l2);
typedef struct {
loc_t *lk_loc;
- int lockee_count;
afr_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
const char *lk_basename;
const char *lower_basename;
const char *higher_basename;
- char lower_locked;
- char higher_locked;
unsigned char *lower_locked_nodes;
- int32_t lock_count;
+ afr_lock_cbk_t lock_cbk;
+
+ int lockee_count;
int32_t lk_call_count;
int32_t lk_expected_count;
@@ -324,14 +367,15 @@ typedef struct {
int32_t lock_op_ret;
int32_t lock_op_errno;
- afr_lock_cbk_t lock_cbk;
char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
+ int32_t lock_count;
+ char lower_locked;
+ char higher_locked;
} afr_internal_lock_t;
struct afr_reply {
int valid;
int32_t op_ret;
- int32_t op_errno;
dict_t *xattr; /*For xattrop*/
dict_t *xdata;
struct iatt poststat;
@@ -340,6 +384,7 @@ struct afr_reply {
struct iatt preparent;
struct iatt preparent2;
struct iatt postparent2;
+ int32_t op_errno;
/* For rchecksum */
uint8_t checksum[SHA256_DIGEST_LENGTH];
gf_boolean_t buf_has_zeroes;
@@ -363,6 +408,10 @@ typedef struct {
arrives, we continue to read off this subvol.
*/
int readdir_subvol;
+ /* lock-healing related members. */
+ gf_boolean_t is_fd_bad;
+ afr_lk_heal_info_t *lk_heal_info;
+
} afr_fd_ctx_t;
typedef enum {
@@ -379,8 +428,6 @@ typedef struct _afr_inode_lock_t {
*/
int32_t num_inodelks;
unsigned int event_generation;
- gf_boolean_t release;
- gf_boolean_t acquired;
gf_timer_t *delay_timer;
struct list_head owners; /*Transactions that are performing fop*/
struct list_head post_op; /*Transactions that are done with the fop
@@ -389,6 +436,8 @@ typedef struct _afr_inode_lock_t {
*conflicting transactions to complete*/
struct list_head frozen; /*Transactions that need to go as part of
* next batch of eager-lock*/
+ gf_boolean_t release;
+ gf_boolean_t acquired;
} afr_lock_t;
typedef struct _afr_inode_ctx {
@@ -397,15 +446,11 @@ typedef struct _afr_inode_ctx {
int lock_count;
int spb_choice;
gf_timer_t *timer;
- gf_boolean_t need_refresh;
unsigned int *pre_op_done[AFR_NUM_CHANGE_LOGS];
int inherited[AFR_NUM_CHANGE_LOGS];
int on_disk[AFR_NUM_CHANGE_LOGS];
-
- /* set if any write on this fd was a non stable write
- (i.e, without O_SYNC or O_DSYNC)
- */
- gf_boolean_t witnessed_unstable_write;
+ /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/
+ afr_lock_t lock[2];
/* @open_fd_count:
Number of open FDs queried from the server, as queried through
@@ -413,8 +458,12 @@ typedef struct _afr_inode_ctx {
temporarily disabled.
*/
uint32_t open_fd_count;
- /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/
- afr_lock_t lock[2];
+ gf_boolean_t need_refresh;
+
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
} afr_inode_ctx_t;
typedef struct _afr_local {
@@ -428,19 +477,15 @@ typedef struct _afr_local {
unsigned int event_generation;
uint32_t open_fd_count;
- gf_boolean_t update_open_fd_count;
int32_t num_inodelks;
- gf_boolean_t update_num_inodelks;
-
- gf_lkowner_t saved_lk_owner;
int32_t op_ret;
int32_t op_errno;
- int32_t **pending;
-
int dirty[AFR_NUM_CHANGE_LOGS];
+ int32_t **pending;
+
loc_t loc;
loc_t newloc;
@@ -471,14 +516,6 @@ typedef struct _afr_local {
afr_read_txn_wind_t readfn;
- /* @refreshed:
-
- the inode was "refreshed" (i.e, pending xattrs from all subvols
- freshly inspected and inode ctx updated accordingly) as part of
- this transaction already.
- */
- gf_boolean_t refreshed;
-
/* @inode:
the inode on which the read txn is performed on. ref'ed and copied
@@ -503,8 +540,6 @@ typedef struct _afr_local {
unsigned char *readable;
unsigned char *readable2; /*For rename transaction*/
- int read_subvol; /* Current read subvolume */
-
afr_inode_refresh_cbk_t refreshfn;
/* @refreshinode:
@@ -513,9 +548,30 @@ typedef struct _afr_local {
*/
inode_t *refreshinode;
+ dict_t *xattr_req;
+
+ dict_t *dict;
+
+ int read_subvol; /* Current read subvolume */
+
+ int optimistic_change_log;
+
+ afr_internal_lock_t internal_lock;
+
/*To handle setattr/setxattr on yet to be linked inode from dht*/
uuid_t refreshgfid;
+ /* @refreshed:
+
+ the inode was "refreshed" (i.e, pending xattrs from all subvols
+ freshly inspected and inode ctx updated accordingly) as part of
+ this transaction already.
+ */
+ gf_boolean_t refreshed;
+
+ gf_boolean_t update_num_inodelks;
+ gf_boolean_t update_open_fd_count;
+
/*
@pre_op_compat:
@@ -525,14 +581,6 @@ typedef struct _afr_local {
gf_boolean_t pre_op_compat;
- dict_t *xattr_req;
-
- afr_internal_lock_t internal_lock;
-
- dict_t *dict;
-
- int optimistic_change_log;
-
/* Is the current writev() going to perform a stable write?
i.e, is fd->flags or @flags writev param have O_SYNC or
O_DSYNC?
@@ -551,25 +599,25 @@ typedef struct _afr_local {
struct {
struct {
- gf_boolean_t needs_fresh_lookup;
- uuid_t gfid_req;
- } lookup;
-
- struct {
- unsigned char buf_set;
struct statvfs buf;
+ unsigned char buf_set;
} statfs;
struct {
- int32_t flags;
fd_t *fd;
+ int32_t flags;
} open;
struct {
- int32_t cmd;
struct gf_flock user_flock;
struct gf_flock ret_flock;
unsigned char *locked_nodes;
+ int32_t cmd;
+ /*For lock healing only.*/
+ unsigned char *dom_locked_nodes;
+ int32_t *dom_lock_op_ret;
+ int32_t *dom_lock_op_errno;
+ struct gf_flock *getlk_rsp;
} lk;
/* inode read */
@@ -594,8 +642,8 @@ typedef struct _afr_local {
struct {
char *name;
- int last_index;
long xattr_len;
+ int last_index;
} getxattr;
struct {
@@ -608,11 +656,10 @@ typedef struct _afr_local {
/* dir read */
struct {
+ uint32_t *checksum;
int success_count;
int32_t op_ret;
int32_t op_errno;
-
- uint32_t *checksum;
} opendir;
struct {
@@ -621,8 +668,8 @@ typedef struct _afr_local {
size_t size;
off_t offset;
dict_t *dict;
- gf_boolean_t failed;
int last_index;
+ gf_boolean_t failed;
} readdir;
/* inode write */
@@ -632,12 +679,11 @@ typedef struct _afr_local {
} inode_wfop; // common structure for all inode-write-fops
struct {
- int32_t op_ret;
-
struct iovec *vector;
struct iobref *iobref;
- int32_t count;
off_t offset;
+ int32_t op_ret;
+ int32_t count;
uint32_t flags;
} writev;
@@ -697,29 +743,25 @@ typedef struct _afr_local {
} create;
struct {
+ dict_t *params;
dev_t dev;
mode_t mode;
- dict_t *params;
} mknod;
struct {
- int32_t mode;
dict_t *params;
+ int32_t mode;
} mkdir;
struct {
- int flags;
- } rmdir;
-
- struct {
dict_t *params;
char *linkpath;
} symlink;
struct {
- int32_t mode;
off_t offset;
size_t len;
+ int32_t mode;
} fallocate;
struct {
@@ -746,10 +788,10 @@ typedef struct _afr_local {
struct {
char *volume;
char *basename;
+ void *xdata;
entrylk_cmd in_cmd;
entrylk_cmd cmd;
entrylk_type type;
- void *xdata;
} entrylk;
struct {
@@ -758,31 +800,33 @@ typedef struct _afr_local {
} seek;
struct {
- int32_t datasync;
- } fsync;
-
- struct {
struct gf_lease user_lease;
struct gf_lease ret_lease;
unsigned char *locked_nodes;
} lease;
- } cont;
+ struct {
+ int flags;
+ } rmdir;
- struct {
- off_t start, len;
+ struct {
+ int32_t datasync;
+ } fsync;
- gf_boolean_t eager_lock_on;
- gf_boolean_t do_eager_unlock;
+ struct {
+ uuid_t gfid_req;
+ gf_boolean_t needs_fresh_lookup;
+ } lookup;
+ } cont;
+
+ struct {
char *basename;
char *new_basename;
loc_t parent_loc;
loc_t new_parent_loc;
- afr_transaction_type type;
-
/* stub to resume on destruction
of the transaction frame */
call_stub_t *resume_stub;
@@ -800,6 +844,30 @@ typedef struct _afr_local {
FOP failed. */
unsigned char *failed_subvols;
+ call_frame_t *main_frame; /*Fop frame*/
+ call_frame_t *frame; /*Transaction frame*/
+
+ int (*wind)(call_frame_t *frame, xlator_t *this, int subvol);
+
+ int (*unwind)(call_frame_t *frame, xlator_t *this);
+
+ off_t start, len;
+
+ afr_transaction_type type;
+
+ int32_t in_flight_sb_errno; /* This is where the cause of the
+ failure on the last good copy of
+ the file is stored.
+ */
+
+ /* @changelog_resume: function to be called after changlogging
+ (either pre-op or post-op) is done
+ */
+ afr_changelog_resume_t changelog_resume;
+
+ gf_boolean_t eager_lock_on;
+ gf_boolean_t do_eager_unlock;
+
/* @dirtied: flag which indicates whether we set dirty flag
in the OP. Typically true when we are performing operation
on more than one subvol and optimistic changelog is disabled
@@ -824,6 +892,10 @@ typedef struct _afr_local {
*/
gf_boolean_t no_uninherit;
+ gf_boolean_t in_flight_sb; /* Indicator for occurrence of
+ split-brain while in the middle of
+ a txn. */
+
/* @uninherit_done:
@uninherit_value:
@@ -836,27 +908,7 @@ typedef struct _afr_local {
gf_boolean_t uninherit_done;
gf_boolean_t uninherit_value;
- gf_boolean_t in_flight_sb; /* Indicator for occurrence of
- split-brain while in the middle of
- a txn. */
- int32_t in_flight_sb_errno; /* This is where the cause of the
- failure on the last good copy of
- the file is stored.
- */
-
- /* @changelog_resume: function to be called after changlogging
- (either pre-op or post-op) is done
- */
- afr_changelog_resume_t changelog_resume;
-
- call_frame_t *main_frame; /*Fop frame*/
- call_frame_t *frame; /*Transaction frame*/
-
- int (*wind)(call_frame_t *frame, xlator_t *this, int subvol);
-
- int (*unwind)(call_frame_t *frame, xlator_t *this);
-
- /* post-op hook */
+ gf_boolean_t disable_delayed_post_op;
} transaction;
syncbarrier_t barrier;
@@ -869,36 +921,36 @@ typedef struct _afr_local {
mode_t umask;
int xflag;
- gf_boolean_t do_discovery;
struct afr_reply *replies;
/* For client side background heals. */
struct list_head healer;
call_frame_t *heal_frame;
- gf_boolean_t need_full_crawl;
- afr_fop_lock_state_t fop_lock_state;
-
- gf_boolean_t is_read_txn;
afr_inode_ctx_t *inode_ctx;
/*For thin-arbiter transactions.*/
- unsigned char read_txn_query_child;
- unsigned char ta_child_up;
+ int ta_failed_subvol;
+ int ta_event_gen;
struct list_head ta_waitq;
struct list_head ta_onwireq;
afr_ta_fop_state_t fop_state;
- int ta_failed_subvol;
- int ta_event_gen;
+ afr_fop_lock_state_t fop_lock_state;
+ gf_lkowner_t saved_lk_owner;
+ unsigned char read_txn_query_child;
+ unsigned char ta_child_up;
+ gf_boolean_t do_discovery;
+ gf_boolean_t need_full_crawl;
+ gf_boolean_t is_read_txn;
gf_boolean_t is_new_entry;
} afr_local_t;
typedef struct afr_spbc_timeout {
call_frame_t *frame;
- gf_boolean_t d_spb;
- gf_boolean_t m_spb;
loc_t *loc;
int spb_child_index;
+ gf_boolean_t d_spb;
+ gf_boolean_t m_spb;
} afr_spbc_timeout_t;
typedef struct afr_spb_status {
@@ -908,9 +960,9 @@ typedef struct afr_spb_status {
typedef struct afr_empty_brick_args {
call_frame_t *frame;
+ char *op_type;
loc_t loc;
int empty_index;
- char *op_type;
} afr_empty_brick_args_t;
typedef struct afr_read_subvol_args {
@@ -952,7 +1004,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
int event_generation);
int
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
+
+int
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
int
afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
@@ -1068,6 +1123,9 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
if (__local && __local->is_read_txn) \
afr_pending_read_decrement(__this->private, \
__local->read_subvol); \
+ if (__local && __local->xdata_req && \
+ afr_is_lock_mode_mandatory(__local->xdata_req)) \
+ afr_dom_lock_release(frame); \
frame->local = NULL; \
} \
\
@@ -1220,8 +1278,8 @@ int
afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
int spb_choice);
int
-afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
- int *spb_choice);
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol);
int
afr_get_child_index_from_name(xlator_t *this, char *name);
@@ -1306,7 +1364,7 @@ int
afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode);
int
-afr_fill_ta_loc(xlator_t *this, loc_t *loc);
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop);
int
afr_ta_post_op_lock(xlator_t *this, loc_t *loc);
@@ -1333,12 +1391,33 @@ void
afr_ta_locked_priv_invalidate(afr_private_t *priv);
gf_boolean_t
-afr_lookup_has_quorum(call_frame_t *frame, xlator_t *this,
- unsigned char *subvols);
+afr_lookup_has_quorum(call_frame_t *frame,
+ const unsigned int up_children_count);
void
afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this);
void
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv,
+ int child);
+
+void
afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
+
+gf_boolean_t
+afr_is_lock_mode_mandatory(dict_t *xdata);
+
+void
+afr_dom_lock_release(call_frame_t *frame);
+
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies);
+
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index f9e7141604a..8ba0cc4c732 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -10,48 +10,52 @@
/* TODO: add NS locking */
-#include <glusterfs/glusterfs.h>
-#include <glusterfs/xlator.h>
#include "libxlator.h"
#include "dht-common.h"
#include "dht-lock.h"
-#include <glusterfs/defaults.h>
#include <glusterfs/byte-order.h>
#include <glusterfs/quota-common-utils.h>
#include <glusterfs/upcall-utils.h>
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+#include <glusterfs/common-utils.h>
#include <sys/time.h>
#include <libgen.h>
#include <signal.h>
-int run_defrag = 0;
-
-int
-dht_link2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
-int
-dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
- int ret);
+static int
+dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
-dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
-int
-dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this);
+static int
+dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this);
-int
-dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata);
+static int
+dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
-int
-dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
+static int
+dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
-int
-dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
+static const char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
-int
-dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc);
+/* Check the xdata to make sure EBADF has been set by client xlator */
+int32_t
+dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno)
+{
+ if (op_ret == -1 && (op_errno == EBADF || op_errno == EBADFD) &&
+ !(local->fd_checked)) {
+ return 1;
+ }
+ return 0;
+}
/* Sets the blocks and size values to fixed values. This is to be called
* only for dirs. The caller is responsible for checking the type
@@ -67,68 +71,17 @@ dht_set_fixed_dir_stat(struct iatt *stat)
return -1;
}
-/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
- * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
- * DHT_IATT_IN_XDATA_KEY
- */
-int
-dht_request_iatt_in_xdata(xlator_t *this, dict_t *xattr_req)
-{
- int ret = -1;
-
- ret = dict_set_int8(xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
- ret = dict_set_int8(xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
-
- /* At least one call succeeded */
- return ret;
-}
-
-/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
- * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
- * DHT_IATT_IN_XDATA_KEY
- * This will return a dummy iatt with only the mode and type set
- */
-int
-dht_read_iatt_from_xdata(xlator_t *this, dict_t *xdata, struct iatt *stbuf)
-{
- int ret = -1;
- int32_t mode = 0;
-
- ret = dict_get_int32(xdata, DHT_MODE_IN_XDATA_KEY, &mode);
-
- if (ret) {
- ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
- } else {
- stbuf->ia_prot = ia_prot_from_st_mode(mode);
- stbuf->ia_type = ia_type_from_st_mode(mode);
- }
-
- return ret;
-}
-
-int
-dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
-
-char *xattrs_to_heal[] = {"user.",
- POSIX_ACL_ACCESS_XATTR,
- POSIX_ACL_DEFAULT_XATTR,
- QUOTA_LIMIT_KEY,
- QUOTA_LIMIT_OBJECTS_KEY,
- GF_SELINUX_XATTR_KEY,
- GF_XATTR_MDATA_KEY,
- NULL};
-
-char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
-
/* Return true if key exists in array
*/
static gf_boolean_t
dht_match_xattr(const char *key)
{
+ char **xattrs_to_heal = get_xattrs_to_heal();
+
return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0;
}
-int
+static int
dht_aggregate_quota_xattr(dict_t *dst, char *key, data_t *value)
{
int ret = -1;
@@ -191,7 +144,7 @@ out:
return ret;
}
-int
+static int
add_opt(char **optsp, const char *opt)
{
char *newopts = NULL;
@@ -269,7 +222,7 @@ out:
*/
-int
+static int
dht_aggregate_split_brain_xattr(dict_t *dst, char *key, data_t *value)
{
int ret = 0;
@@ -368,7 +321,7 @@ out:
return ret;
}
-int
+static int
dht_aggregate(dict_t *this, char *key, data_t *value, void *data)
{
dict_t *dst = NULL;
@@ -415,7 +368,7 @@ out:
return ret;
}
-void
+static void
dht_aggregate_xattr(dict_t *dst, dict_t *src)
{
if ((dst == NULL) || (src == NULL)) {
@@ -497,7 +450,7 @@ dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol)
- complete linkfile selfheal
*/
-int
+static int
dht_lookup_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -539,7 +492,7 @@ out:
return ret;
}
-int
+static int
dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
{
dht_local_t *local = NULL;
@@ -660,13 +613,14 @@ dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
if (local->need_xattr_heal && !heal_path) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
DHT_MSG_DIR_XATTR_HEAL_FAILED,
"xattr heal failed for "
"directory gfid is %s ",
gfid_local);
+ }
}
}
@@ -727,7 +681,7 @@ out:
return ret;
}
-int
+static int
dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -736,6 +690,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
dht_conf_t *conf = 0;
dht_layout_t *layout = NULL;
+ int32_t mds_heal_fresh_lookup = 0;
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
@@ -743,6 +698,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
conf = this->private;
layout = local->selfheal.layout;
+ mds_heal_fresh_lookup = local->mds_heal_fresh_lookup;
if (op_ret) {
gf_msg_debug(this->name, op_ret,
@@ -763,11 +719,63 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
layout);
}
out:
- if (local && local->mds_heal_fresh_lookup)
+ if (mds_heal_fresh_lookup)
DHT_STACK_DESTROY(frame);
return 0;
}
+static xlator_t *
+dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc)
+{
+ char *path = NULL;
+ loc_t populate_loc = {
+ 0,
+ };
+ char *name = NULL;
+ xlator_t *hash_subvol = NULL;
+
+ if (!inode)
+ return hash_subvol;
+
+ if (loc && loc->parent && loc->path) {
+ if (!loc->name) {
+ name = strrchr(loc->path, '/');
+ if (name) {
+ loc->name = name + 1;
+ } else {
+ goto out;
+ }
+ }
+ hash_subvol = dht_subvol_get_hashed(this, loc);
+ goto out;
+ }
+
+ if (!gf_uuid_is_null(inode->gfid)) {
+ populate_loc.inode = inode_ref(inode);
+ populate_loc.parent = inode_parent(populate_loc.inode, NULL, NULL);
+ inode_path(populate_loc.inode, NULL, &path);
+
+ if (!path)
+ goto out;
+
+ populate_loc.path = path;
+ if (!populate_loc.name && populate_loc.path) {
+ name = strrchr(populate_loc.path, '/');
+ if (name) {
+ populate_loc.name = name + 1;
+
+ } else {
+ goto out;
+ }
+ }
+ hash_subvol = dht_subvol_get_hashed(this, &populate_loc);
+ }
+out:
+ if (populate_loc.inode)
+ loc_wipe(&populate_loc);
+ return hash_subvol;
+}
+
/* Common function call by revalidate/selfheal code path to populate
internal xattr if it is not present, mark_during_fresh_lookup value
determines either function is call by revalidate_cbk(discover_complete)
@@ -853,7 +861,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
"Failed to get hashed subvol for path %s"
"gfid is %s ",
local->loc.path, gfid_local);
- (*errst) = 1;
+ if (errst)
+ (*errst) = 1;
ret = -1;
goto out;
}
@@ -924,7 +933,44 @@ out:
return ret;
}
-int
+/* Get the value of key from dict in the bytewise and save in array after
+ convert from network byte order to host byte order
+*/
+static int32_t
+dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
+ int *errst)
+{
+ void *ptr = NULL;
+ int32_t len = -1;
+ int32_t vindex = -1;
+ int32_t err = -1;
+ int ret = 0;
+
+ if (dict == NULL) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ (*errst) = -1;
+ return err;
+ }
+
+ if (len != (size * sizeof(int32_t))) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+
+ for (vindex = 0; vindex < size; vindex++) {
+ value[vindex] = ntoh32(*((int32_t *)ptr + vindex));
+ if (value[vindex] < 0)
+ ret = -1;
+ }
+
+ return ret;
+}
+
+static int
dht_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, inode_t *inode, struct iatt *stbuf,
dict_t *xattr, struct iatt *postparent)
@@ -1019,7 +1065,7 @@ dht_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
if (local->xattr == NULL) {
local->xattr = dict_ref(xattr);
} else {
- /* Don't aggregate for files. See BZ#1484113 */
+ /* Don't aggregate for files. See BZ#1484709 */
if (is_dir)
dht_aggregate_xattr(local->xattr, xattr);
}
@@ -1085,7 +1131,53 @@ out:
return 0;
}
-int
+static int
+dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int ret = -EINVAL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf) {
+ goto err;
+ }
+
+ if (!xattr_req) {
+ goto err;
+ }
+
+ /* Used to check whether this is a linkto file.
+ */
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->link_xattr_name, loc->path);
+ goto err;
+ }
+
+ /* This is used to make sure we don't unlink linkto files
+ * which are the target of an ongoing file migration.
+ */
+ ret = dict_set_uint32(xattr_req, GLUSTERFS_OPEN_FD_COUNT, 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ GLUSTERFS_OPEN_FD_COUNT, loc->path);
+ goto err;
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+/* This is a gfid based nameless lookup. Without a name, the hashed subvol
+ * cannot be calculated so a lookup is sent to all subvols.
+ */
+static int
dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
int ret;
@@ -1099,6 +1191,9 @@ dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc)
conf = this->private;
local = frame->local;
+ /* As we do not know if this is a file or directory, request
+ * both file and directory xattrs
+ */
ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
if (ret) {
goto err;
@@ -1110,6 +1205,9 @@ dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc)
}
if (loc_is_root(loc)) {
+ /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash)
+ * set on the brick root.
+ */
ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name,
sizeof(uint32_t));
}
@@ -1151,48 +1249,11 @@ err:
return 0;
}
-/* Get the value of key from dict in the bytewise and save in array after
- convert from network byte order to host byte order
-*/
-int32_t
-dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
- int *errst)
-{
- void *ptr = NULL;
- int32_t len = -1;
- int32_t vindex = -1;
- int32_t err = -1;
- int ret = 0;
-
- if (dict == NULL) {
- (*errst) = -1;
- return -EINVAL;
- }
- err = dict_get_ptr_and_len(dict, key, &ptr, &len);
- if (err != 0) {
- (*errst) = -1;
- return err;
- }
-
- if (len != (size * sizeof(int32_t))) {
- (*errst) = -1;
- return -EINVAL;
- }
-
- for (vindex = 0; vindex < size; vindex++) {
- value[vindex] = ntoh32(*((int32_t *)ptr + vindex));
- if (value[vindex] < 0)
- ret = -1;
- }
-
- return ret;
-}
-
/* Code to call syntask to heal custom xattr from hashed subvol
to non hashed subvol
*/
int
-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno)
{
dht_local_t *copy_local = NULL;
call_frame_t *copy = NULL;
@@ -1204,6 +1265,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"No gfid exists for path %s "
"so healing xattr is not possible",
local->loc.path);
+ *op_errno = EIO;
goto out;
}
@@ -1217,6 +1279,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"Memory allocation failed "
"for path %s gfid %s ",
local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
DHT_STACK_DESTROY(copy);
} else {
copy_local->stbuf = local->stbuf;
@@ -1231,6 +1294,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"Synctask creation failed to heal xattr "
"for path %s gfid %s ",
local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
DHT_STACK_DESTROY(copy);
}
}
@@ -1239,7 +1303,7 @@ out:
return ret;
}
-int
+static int
dht_needs_selfheal(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
@@ -1264,6 +1328,26 @@ dht_needs_selfheal(call_frame_t *frame, xlator_t *this)
return needs_selfheal;
}
+static int
+is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2)
+{
+ if ((prot1->owner.read != prot2->owner.read) ||
+ (prot1->owner.write != prot2->owner.write) ||
+ (prot1->owner.exec != prot2->owner.exec) ||
+ (prot1->group.read != prot2->group.read) ||
+ (prot1->group.write != prot2->group.write) ||
+ (prot1->group.exec != prot2->group.exec) ||
+ (prot1->other.read != prot2->other.read) ||
+ (prot1->other.write != prot2->other.write) ||
+ (prot1->other.exec != prot2->other.exec) ||
+ (prot1->suid != prot2->suid) || (prot1->sgid != prot2->sgid) ||
+ (prot1->sticky != prot2->sticky)) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
int
dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
@@ -1351,13 +1435,31 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
dht_aggregate_xattr(local->xattr, xattr);
}
+ if (__is_root_gfid(stbuf->ia_gfid)) {
+ ret = dht_dir_has_layout(xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->prebuf.ia_ctime,
+ local->prebuf.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) {
+ /* Choose source */
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+
+ local->prebuf.ia_ctime = stbuf->ia_ctime;
+ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec;
+ local->prebuf.ia_prot = stbuf->ia_prot;
+ }
+ }
+ }
+
if (local->stbuf.ia_type != IA_INVAL) {
- /* This is not the first subvol to respond */
- if (!__is_root_gfid(stbuf->ia_gfid) &&
- ((local->stbuf.ia_gid != stbuf->ia_gid) ||
- (local->stbuf.ia_uid != stbuf->ia_uid) ||
- (is_permission_different(&local->stbuf.ia_prot,
- &stbuf->ia_prot)))) {
+ /* This is not the first subvol to respond
+ * Compare values to see if attrs need to be healed
+ */
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid) ||
+ (is_permission_different(&local->stbuf.ia_prot,
+ &stbuf->ia_prot))) {
local->need_attrheal = 1;
}
}
@@ -1377,6 +1479,9 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto unlock;
}
+ /* Save the mds subvol info and stbuf. This is the value that will
+ * be used for healing
+ */
local->mds_subvol = prev;
local->mds_stbuf = *stbuf;
@@ -1390,6 +1495,7 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
mds_xattr_val, 1, &errst);
if ((check_mds < 0) && !errst) {
+ /* Check if xattrs need to be healed on the directories */
local->mds_xattr = dict_ref(xattr);
gf_msg_debug(this->name, 0,
"%s: %s is not zero on %s. Xattrs need to be healed."
@@ -1474,24 +1580,57 @@ out:
return ret;
}
-int
-is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2)
+static int
+dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- if ((prot1->owner.read != prot2->owner.read) ||
- (prot1->owner.write != prot2->owner.write) ||
- (prot1->owner.exec != prot2->owner.exec) ||
- (prot1->group.read != prot2->group.read) ||
- (prot1->group.write != prot2->group.write) ||
- (prot1->group.exec != prot2->group.exec) ||
- (prot1->other.read != prot2->other.read) ||
- (prot1->other.write != prot2->other.write) ||
- (prot1->other.exec != prot2->other.exec) ||
- (prot1->suid != prot2->suid) || (prot1->sgid != prot2->sgid) ||
- (prot1->sticky != prot2->sticky)) {
- return 1;
- } else {
- return 0;
+ int call_cnt = 0;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO("dht", loc, unwind);
+
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ goto unwind;
+ }
+
+ if (local->xattr != NULL) {
+ dict_unref(local->xattr);
+ local->xattr = NULL;
+ }
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ /* use this gfid in order to heal any missing ones */
+ ret = dict_set_gfuuid(local->xattr_req, "gfid-req", local->gfid, true);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:"
+ " key = gfid-req",
+ local->loc.path);
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(
+ frame, dht_lookup_dir_cbk, conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc, local->xattr_req);
}
+ return 0;
+unwind:
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+out:
+ return 0;
}
int
@@ -1512,7 +1651,7 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
uint32_t vol_commit_hash = 0;
xlator_t *subvol = NULL;
int32_t check_mds = 0;
- int errst = 0;
+ int errst = 0, i = 0;
int32_t mds_xattr_val[1] = {0};
GF_VALIDATE_OR_GOTO("dht", frame, err);
@@ -1526,6 +1665,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
conf = this->private;
if (!conf->vch_forced) {
+ /* Update the commithash value if available
+ */
ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
&vol_commit_hash);
if (ret == 0) {
@@ -1577,11 +1718,21 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->need_lookup_everywhere = 1;
} else if (IA_ISDIR(local->loc.inode->ia_type)) {
+ layout = local->layout;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == prev) {
+ layout->list[i].err = op_errno;
+ break;
+ }
+ }
+
local->need_selfheal = 1;
}
}
- /* The GFID is missing on this subvol*/
+ /* The GFID is missing on this subvol. Lookup everywhere to force a
+ * gfid heal
+ */
if ((op_errno == ENODATA) &&
(IA_ISDIR(local->loc.inode->ia_type))) {
local->need_lookup_everywhere = 1;
@@ -1663,6 +1814,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path, prev->name);
}
if ((check_mds < 0) && !errst) {
+ /* Check if xattrs need to be healed on the directory
+ */
local->mds_xattr = dict_ref(xattr);
gf_msg_debug(this->name, 0,
"Value of %s is not zero on "
@@ -1678,6 +1831,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
ret = dht_layout_dir_mismatch(this, layout, prev, &local->loc,
xattr);
if (ret != 0) {
+ /* In memory layout does not match on-disk layout.
+ */
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_MISMATCH,
"Mismatching layouts for %s, gfid = %s", local->loc.path,
gfid);
@@ -1704,6 +1859,8 @@ unlock:
UNLOCK(&frame->lock);
if (follow_link) {
+ /* Found a linkto file. Follow it to see if the target file exists
+ */
gf_uuid_copy(local->gfid, stbuf->ia_gfid);
subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
@@ -1733,6 +1890,7 @@ unlock:
local->need_xattr_heal = 0;
if (IA_ISDIR(local->stbuf.ia_type)) {
+ /* No mds xattr found. Trigger a heal to set it */
if (!__is_root_gfid(local->loc.inode->gfid) &&
(!dict_get(local->xattr, conf->mds_xattr_key)))
local->need_selfheal = 1;
@@ -1822,12 +1980,11 @@ err:
return ret;
}
-int
-dht_lookup_linkfile_create_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+static int
+dht_lookup_linkfile_create_cbk(call_frame_t *frame, void *cooie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
@@ -1889,7 +2046,7 @@ out:
return ret;
}
-int
+static int
dht_lookup_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -1915,7 +2072,7 @@ dht_lookup_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_lookup_unlink_of_false_linkto_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
struct iatt *preparent,
@@ -1967,7 +2124,7 @@ dht_lookup_unlink_of_false_linkto_cbk(call_frame_t *frame, void *cookie,
return 0;
}
-int
+static int
dht_lookup_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
struct iatt *preparent,
@@ -2000,38 +2157,25 @@ dht_lookup_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
return 0;
}
-int
+static int
dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict)
{
int ret = 0;
- xlator_t *this = NULL;
- char *linktoskip_key = NULL;
-
- this = THIS;
- GF_VALIDATE_OR_GOTO("dht", this, err);
-
- if (dht_is_tier_xlator(this))
- linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK;
- else
- linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK;
- ret = dict_set_int32(dict, linktoskip_key, 1);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1);
if (ret)
- goto err;
+ return -1;
- ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
if (ret)
- goto err;
+ return -1;
return 0;
-
-err:
- return -1;
}
-int32_t
+static int32_t
dht_linkfile_create_lookup_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf, dict_t *xdata,
@@ -2105,7 +2249,7 @@ no_linkto:
return 0;
}
-int32_t
+static int32_t
dht_call_lookup_linkfile_create(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -2191,7 +2335,7 @@ err:
* dht_lookup_everywhere_done takes decision based on any of the above case
*/
-int
+static int
dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this)
{
int ret = 0;
@@ -2505,7 +2649,7 @@ unwind_hashed_and_cached:
return 0;
}
-int
+static int
dht_lookup_everywhere_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *xattr,
@@ -2841,116 +2985,12 @@ out:
return 0;
}
-int
-dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- int call_cnt = 0;
- int i = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int ret = 0;
-
- GF_VALIDATE_OR_GOTO("dht", frame, out);
- GF_VALIDATE_OR_GOTO("dht", this, unwind);
- GF_VALIDATE_OR_GOTO("dht", frame->local, unwind);
- GF_VALIDATE_OR_GOTO("dht", this->private, unwind);
- GF_VALIDATE_OR_GOTO("dht", loc, unwind);
-
- conf = this->private;
- local = frame->local;
-
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new(this, conf->subvolume_cnt);
- if (!local->layout) {
- goto unwind;
- }
-
- if (local->xattr != NULL) {
- dict_unref(local->xattr);
- local->xattr = NULL;
- }
-
- if (!gf_uuid_is_null(local->gfid)) {
- ret = dict_set_gfuuid(local->xattr_req, "gfid-req", local->gfid, true);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = gfid-req",
- local->loc.path);
- }
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND_COOKIE(
- frame, dht_lookup_dir_cbk, conf->subvolumes[i], conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup, &local->loc, local->xattr_req);
- }
- return 0;
-unwind:
- DHT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
-out:
- return 0;
-}
-
/* Code to get hashed subvol based on inode and loc
First it check if loc->parent and loc->path exist then it get
hashed subvol based on loc.
*/
-xlator_t *
-dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc)
-{
- char *path = NULL;
- loc_t populate_loc = {
- 0,
- };
- char *name = NULL;
- xlator_t *hash_subvol = NULL;
-
- if (!inode)
- return hash_subvol;
-
- if (loc && loc->parent && loc->path) {
- if (!loc->name) {
- name = strrchr(loc->path, '/');
- if (name) {
- loc->name = name + 1;
- } else {
- goto out;
- }
- }
- hash_subvol = dht_subvol_get_hashed(this, loc);
- goto out;
- }
-
- if (!gf_uuid_is_null(inode->gfid)) {
- populate_loc.inode = inode_ref(inode);
- populate_loc.parent = inode_parent(populate_loc.inode, NULL, NULL);
- inode_path(populate_loc.inode, NULL, &path);
-
- if (!path)
- goto out;
-
- populate_loc.path = path;
- if (!populate_loc.name && populate_loc.path) {
- name = strrchr(populate_loc.path, '/');
- if (name) {
- populate_loc.name = name + 1;
-
- } else {
- goto out;
- }
- }
- hash_subvol = dht_subvol_get_hashed(this, &populate_loc);
- }
-out:
- if (populate_loc.inode)
- loc_wipe(&populate_loc);
- return hash_subvol;
-}
-
-gf_boolean_t
+static gf_boolean_t
dht_should_lookup_everywhere(xlator_t *this, dht_conf_t *conf, loc_t *loc)
{
dht_layout_t *parent_layout = NULL;
@@ -3063,10 +3103,12 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* or any more call which happens from this 'loc'.
*/
if (gf_uuid_is_null(local->gfid)) {
+ /*This is set from the first successful response*/
memcpy(local->gfid, stbuf->ia_gfid, 16);
}
if (!conf->vch_forced) {
+ /* Update the commit hash in conf if it is found */
ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
&vol_commit_hash);
if (ret == 0) {
@@ -3076,6 +3118,8 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
is_dir = check_is_dir(inode, stbuf, xattr);
if (is_dir) {
+ /* A directory is present on all subvols, send the lookup to
+ * all subvols now */
local->inode = inode_ref(inode);
local->xattr = dict_ref(xattr);
dht_lookup_directory(frame, this, &local->loc);
@@ -3085,7 +3129,9 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
if (!is_linkfile) {
- /* non-directory and not a linkfile */
+ /* non-directory and not a linkto file. This is a data file
+ * Update the layout to point to the cached subvol
+ */
ret = dht_layout_preset(this, prev, inode);
if (ret < 0) {
@@ -3099,6 +3145,9 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto out;
}
+ /* This is a linkto file. Get the value of the target subvol from the
+ * linkto xattr and lookup there to see if the file exists
+ */
subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
if (!subvol) {
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
@@ -3139,7 +3188,7 @@ err:
* xlator), if not, request for them. These xattrs are needed for dht dir
* self-heal to perform proper self-healing of dirs
*/
-void
+static void
dht_check_and_set_acl_xattr_req(xlator_t *this, dict_t *xattr_req)
{
int ret = 0;
@@ -3170,7 +3219,7 @@ dht_check_and_set_acl_xattr_req(xlator_t *this, dict_t *xattr_req)
* the mds information : trusted.glusterfs.dht.mds
* the acl info: See above
*/
-int
+static int
dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
int ret = -EINVAL;
@@ -3211,50 +3260,109 @@ err:
return ret;
}
-int
-dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
+/* If the hashed subvol is present, send the lookup to only that subvol first.
+ * If no hashed subvol, send a lookup to all subvols and proceed based on the
+ * responses.
+ */
+static int
+dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int ret = -EINVAL;
+ int ret = -1;
dht_conf_t *conf = NULL;
+ xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int call_cnt = 0;
+ int i = 0;
conf = this->private;
if (!conf) {
+ op_errno = EINVAL;
goto err;
}
- if (!xattr_req) {
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
goto err;
}
- /* Used to check whether this is a linkto file.
- */
- ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- conf->link_xattr_name, loc->path);
+ /* Since we don't know whether this is a file or a directory,
+ * request all xattrs*/
+ ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
goto err;
}
- /* This is used to make sure we don't unlink linkto files
- * which are the target of an ongoing file migration.
- */
- ret = dict_set_uint32(xattr_req, GLUSTERFS_OPEN_FD_COUNT, 4);
+ ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- GLUSTERFS_OPEN_FD_COUNT, loc->path);
+ op_errno = -ret;
goto err;
}
- ret = 0;
+ /* Fuse sets a random value in gfid-req. If the gfid is missing
+ * on one or more subvols, posix will set the gfid to this value,
+ * causing GFID mismatches for directories. Remove the value fuse
+ * has sent before sending the lookup.
+ */
+ ret = dict_get_gfuuid(local->xattr_req, "gfid-req", &local->gfid_req);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "%s: No gfid-req available", loc->path);
+ } else {
+ dict_del(local->xattr_req, "gfid-req");
+ }
+ /* This should have been set in dht_lookup */
+ hashed_subvol = local->hashed_subvol;
+
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "%s: no subvolume in layout for path, "
+ "checking on all the subvols to see if "
+ "it is a directory",
+ loc->path);
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ /* Allocate a layout. This will be populated and saved in
+ * the dht inode_ctx on successful lookup
+ */
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "%s: Found null hashed subvol. Calling lookup"
+ " on all nodes.",
+ loc->path);
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
+ return 0;
+ }
+
+ /* if the hashed_subvol is non-null, send the lookup there first so
+ * as to see whether we have a file or a directory */
+ gf_msg_debug(this->name, 0, "%s: Calling fresh lookup on %s", loc->path,
+ hashed_subvol->name);
+
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->lookup, loc, local->xattr_req);
+ return 0;
err:
- return ret;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
-int
+static int
dht_do_revalidate(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
xlator_t *subvol = NULL;
@@ -3329,6 +3437,11 @@ dht_do_revalidate(call_frame_t *frame, xlator_t *this, loc_t *loc)
}
local->mds_subvol = mds_subvol;
local->call_cnt = conf->subvolume_cnt;
+
+ /* local->call_cnt will change as responses are processed. Always use a
+ * local copy to loop through the STACK_WIND calls
+ */
+
call_cnt = local->call_cnt;
for (i = 0; i < call_cnt; i++) {
@@ -3362,99 +3475,11 @@ err:
return 0;
}
-int
-dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- int ret = -1;
- dht_conf_t *conf = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int call_cnt = 0;
- int i = 0;
-
- conf = this->private;
- if (!conf) {
- op_errno = EINVAL;
- goto err;
- }
-
- local = frame->local;
- if (!local) {
- op_errno = EINVAL;
- goto err;
- }
-
- /* Since we don't know whether this is a file or a directory,
- * request all xattrs*/
- ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
- if (ret) {
- op_errno = -ret;
- goto err;
- }
-
- ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
- if (ret) {
- op_errno = -ret;
- goto err;
- }
-
- /* Fuse sets a random value in gfid-req. If the gfid is missing
- * on one or more subvols, posix will set the gfid to this value,
- * causing GFID mismatches for directories.
- */
- ret = dict_get_gfuuid(local->xattr_req, "gfid-req", &local->gfid_req);
- if (ret) {
- gf_msg_debug(this->name, 0, "%s: No gfid-req available", loc->path);
- } else {
- dict_del(local->xattr_req, "gfid-req");
- }
- /* This should have been set in dht_lookup */
- hashed_subvol = local->hashed_subvol;
-
- if (!hashed_subvol) {
- gf_msg_debug(this->name, 0,
- "%s: no subvolume in layout for path, "
- "checking on all the subvols to see if "
- "it is a directory",
- loc->path);
-
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new(this, conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
- goto err;
- }
-
- gf_msg_debug(this->name, 0,
- "%s: Found null hashed subvol. Calling lookup"
- " on all nodes.",
- loc->path);
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup, &local->loc,
- local->xattr_req);
- }
- return 0;
- }
-
- /* if we have the hashed_subvol, send the lookup there first so
- * as to see whether we have a file or a directory */
- gf_msg_debug(this->name, 0, "%s: Calling fresh lookup on %s", loc->path,
- hashed_subvol->name);
-
- STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, hashed_subvol,
- hashed_subvol->fops->lookup, loc, local->xattr_req);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
+/* Depending on the input, decide if this is a:
+ * fresh-lookup: loc->name is provided but no dht inode ctx
+ * revalidation: loc->name is provided, dht inode ctx is present
+ * discover: gfid based nameless lookup.
+ */
int
dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
@@ -3508,6 +3533,10 @@ dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
/* Nameless lookup */
+ /* This is usually sent by NFS. Lookups are done based on the gfid and
+ * no name information is available. Without the name, dht cannot calculate
+ * the hash and has to send a lookup to all subvols.
+ */
if (gf_uuid_is_null(loc->pargfid) && !gf_uuid_is_null(loc->gfid) &&
!__is_root_gfid(loc->inode->gfid)) {
local->cached_subvol = NULL;
@@ -3516,6 +3545,9 @@ dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
}
if (loc_is_root(loc)) {
+ /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash)
+ * set on the brick root.
+ */
ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name,
sizeof(uint32_t));
}
@@ -3524,12 +3556,14 @@ dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
hashed_subvol = dht_subvol_get_hashed(this, loc);
local->hashed_subvol = hashed_subvol;
- /* The entry has been looked up before and has an inode_ctx set
- */
if (is_revalidate(loc)) {
+ /* The entry has been looked up before and has a dht inode_ctx
+ */
dht_do_revalidate(frame, this, loc);
return 0;
} else {
+ /* Entry has not been looked up before
+ */
dht_do_fresh_lookup(frame, this, loc);
return 0;
}
@@ -3541,7 +3575,7 @@ err:
return 0;
}
-int
+static int
dht_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -3575,7 +3609,7 @@ post_unlock:
return 0;
}
-int
+static int
dht_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
@@ -3666,7 +3700,7 @@ dht_fix_layout_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_err_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xdata)
{
@@ -3738,7 +3772,7 @@ dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size)
return ret;
}
-int
+static int
dht_common_mds_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict,
dict_t *xdata)
@@ -3781,7 +3815,7 @@ out:
/* Code to wind a xattrop call to add 1 on current mds internal xattr
value
*/
-int
+static int
dht_setxattr_non_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -3894,7 +3928,7 @@ just_return:
return 0;
}
-int
+static int
dht_setxattr_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -3981,7 +4015,7 @@ just_return:
return 0;
}
-int
+static int
dht_xattrop_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
@@ -4094,7 +4128,7 @@ dht_fill_pathinfo_xattr(xlator_t *this, dht_local_t *local, char *xattr_buf,
}
}
-int
+static int
dht_vgetxattr_alloc_and_fill(dht_local_t *local, dict_t *xattr, xlator_t *this,
int op_errno)
{
@@ -4143,7 +4177,7 @@ out:
return ret;
}
-int
+static int
dht_vgetxattr_fill_and_set(dht_local_t *local, dict_t **dict, xlator_t *this,
gf_boolean_t flag)
{
@@ -4199,7 +4233,7 @@ out:
return ret;
}
-int
+static int
dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr,
dict_t *xdata)
@@ -4241,8 +4275,11 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = -1;
local->op_errno = op_errno;
UNLOCK(&frame->lock);
- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED,
- "getxattr err for dir");
+ if (op_errno == ENODATA)
+ gf_msg_debug(this->name, 0, "failed to get node-uuid");
+ else
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid");
goto post_unlock;
}
@@ -4264,6 +4301,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
index = conf->local_subvols_cnt;
uuid_list_copy = gf_strdup(uuid_list);
+ if (!uuid_list_copy)
+ goto unlock;
for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str;
uuid_str = next_uuid_str) {
@@ -4353,7 +4392,7 @@ out:
return 0;
}
-int
+static int
dht_vgetxattr_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
@@ -4419,7 +4458,7 @@ out:
return 0;
}
-int
+static int
dht_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xattr, dict_t *xdata)
{
@@ -4465,7 +4504,7 @@ cleanup:
return 0;
}
-int
+static int
dht_linkinfo_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr,
dict_t *xdata)
@@ -4487,7 +4526,7 @@ dht_linkinfo_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_mds_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
@@ -4528,6 +4567,7 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int this_call_cnt = 0;
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
+ int ret = 0;
VALIDATE_OR_GOTO(frame, err);
VALIDATE_OR_GOTO(frame->local, err);
@@ -4536,6 +4576,13 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
conf = this->private;
local = frame->local;
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
LOCK(&frame->lock);
{
if (!xattr || (op_ret == -1)) {
@@ -4546,18 +4593,8 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
dict_del(xattr, conf->xattr_name);
dict_del(xattr, conf->mds_xattr_key);
- /* filter out following two xattrs that need not
- * be visible on the mount point for geo-rep -
- * trusted.tier.fix.layout.complete and
- * trusted.tier.tier-dht.commithash
- */
-
dict_del(xattr, conf->commithash_xattr_name);
- if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) {
- dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY);
- }
-
if (frame->root->pid >= 0) {
GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
@@ -4598,7 +4635,7 @@ err:
return 0;
}
-int32_t
+static int32_t
dht_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict,
dict_t *xdata)
{
@@ -4606,7 +4643,7 @@ dht_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict,
return 0;
}
-int
+static int
dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
dict_t *xattr, dict_t *xdata)
@@ -4707,7 +4744,7 @@ post_unlock:
return 0;
}
-int
+static int
dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *key, dict_t *xdata)
{
@@ -4734,7 +4771,7 @@ dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
return 0;
}
-int
+static int
dht_marker_populate_args(call_frame_t *frame, int type, int *gauge,
xlator_t **subvols)
{
@@ -4751,8 +4788,8 @@ dht_marker_populate_args(call_frame_t *frame, int type, int *gauge,
return layout->cnt;
}
-int
-dht_is_debug_xattr_key(char **array, char *key)
+static int
+dht_is_debug_xattr_key(const char **array, char *key)
{
int i = 0;
@@ -4766,7 +4803,7 @@ dht_is_debug_xattr_key(char **array, char *key)
/* Note we already have frame->local initialised here*/
-int
+static int
dht_handle_debug_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *key)
{
@@ -4778,10 +4815,6 @@ dht_handle_debug_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name = NULL;
local = frame->local;
- if (!key) {
- op_errno = EINVAL;
- goto out;
- }
if (dht_is_debug_xattr_key(dht_dbg_vxattrs, (char *)key) == -1) {
goto out;
@@ -4835,6 +4868,60 @@ out:
return 0;
}
+/* Virtual Xattr which returns 1 if all subvols are up,
+ else returns 0. Geo-rep then uses this virtual xattr
+ after a fresh mount and starts the I/O.
+*/
+
+enum dht_vxattr_subvol {
+ DHT_VXATTR_SUBVOLS_UP = 1,
+ DHT_VXATTR_SUBVOLS_DOWN = 0,
+};
+
+int
+dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this,
+ const char *key)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENODATA;
+ int value = DHT_VXATTR_SUBVOLS_UP;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ if (!key) {
+ op_errno = EINVAL;
+ goto out;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ value = DHT_VXATTR_SUBVOLS_DOWN;
+ gf_msg_debug(this->name, 0, "subvol %s is down ",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ ret = dict_set_int8(local->xattr, (char *)key, value);
+ if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
+ return 0;
+}
+
int
dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
dict_t *xdata)
@@ -4892,6 +4979,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
goto err;
}
+ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
+ dht_vgetxattr_subvol_status(frame, this, key);
+ return 0;
+ }
+
/* skip over code which is irrelevant if !DHT_IS_DIR(layout) */
if (!DHT_IS_DIR(layout))
goto no_dht_is_dir;
@@ -5187,6 +5279,53 @@ err:
return 0;
}
+static int
+dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto err;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto err;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->setxattr, &local->loc,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->fsetxattr, local->fd,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(setxattr, frame, (local ? local->op_ret : -1), op_errno,
+ NULL);
+ return 0;
+}
+
int
dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
@@ -5203,8 +5342,8 @@ dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
- if ((local->fop == GF_FOP_FSETXATTR) && op_ret == -1 &&
- (op_errno == EBADF) && !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FSETXATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -5277,7 +5416,7 @@ dht_is_user_xattr(dict_t *this, char *key, data_t *value, void *data)
/* Common code to wind a (f)(set|remove)xattr call to set xattr on directory
*/
-int
+static int
dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
fd_t *fd, dict_t *xattr, int flags,
dict_t *xdata, int *op_errno)
@@ -5294,11 +5433,13 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
int call_cnt = 0;
dht_local_t *local = NULL;
char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char **xattrs_to_heal;
conf = this->private;
local = frame->local;
call_cnt = conf->subvolume_cnt;
local->flags = flags;
+ xattrs_to_heal = get_xattrs_to_heal();
if (!gf_uuid_is_null(local->gfid)) {
gf_uuid_unparse(local->gfid, gfid_local);
@@ -5530,7 +5671,7 @@ err:
return 0;
}
-int
+static int
dht_checking_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr,
dict_t *xdata)
@@ -5569,54 +5710,7 @@ out:
return 0;
}
-int
-dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
-{
- dht_local_t *local = NULL;
- int op_errno = EINVAL;
-
- if (!frame || !frame->local)
- goto err;
-
- local = frame->local;
- op_errno = local->op_errno;
-
- if (we_are_not_migrating(ret)) {
- /* This dht xlator is not migrating the file. Unwind and
- * pass on the original mode bits so the higher DHT layer
- * can handle this.
- */
- DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
- local->rebalance.xdata);
- return 0;
- }
-
- if (subvol == NULL)
- goto err;
-
- local->call_cnt = 2; /* This is the second attempt */
-
- if (local->fop == GF_FOP_SETXATTR) {
- STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
- subvol->fops->setxattr, &local->loc,
- local->rebalance.xattr, local->rebalance.flags,
- local->xattr_req);
- } else {
- STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
- subvol->fops->fsetxattr, local->fd,
- local->rebalance.xattr, local->rebalance.flags,
- local->xattr_req);
- }
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(setxattr, frame, (local ? local->op_ret : -1), op_errno,
- NULL);
- return 0;
-}
-
-int
+static int
dht_nuke_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -5625,7 +5719,7 @@ dht_nuke_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_nuke_dir(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp)
{
if (!IA_ISDIR(loc->inode->ia_type)) {
@@ -5778,22 +5872,7 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
if (local->rebalance.target_node) {
local->flags = forced_rebalance;
- /* Flag to suggest its a tiering migration
- * The reason for this dic key-value is that
- * promotions and demotions are multithreaded
- * so the original frame from gf_defrag_start()
- * is not carried. A new frame will be created when
- * we do syncop_setxattr(). This does not have the
- * frame->root->pid of the original frame. So we pass
- * this dic key-value when we do syncop_setxattr() to do
- * data migration and set the frame->root->pid to
- * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
- * calling dht_start_rebalance_task() */
- tmp = dict_get(xattr, TIERING_MIGRATION_KEY);
- if (tmp)
- frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG;
- else
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
ret = dht_start_rebalance_task(this, frame);
if (!ret)
@@ -5912,6 +5991,50 @@ err:
return 0;
}
+static int
+dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto err;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto err;
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->removexattr, &local->loc, local->key,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->fremovexattr, local->fd, local->key,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
int
dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
@@ -5928,8 +6051,8 @@ dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
- if ((local->fop == GF_FOP_FREMOVEXATTR) && (op_ret == -1) &&
- (op_errno == EBADF) && !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FREMOVEXATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -5989,84 +6112,6 @@ out:
}
int
-dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
-{
- dht_local_t *local = NULL;
- int op_errno = EINVAL;
-
- if (!frame || !frame->local)
- goto err;
-
- local = frame->local;
- op_errno = local->op_errno;
-
- local->call_cnt = 2; /* This is the second attempt */
-
- if (we_are_not_migrating(ret)) {
- /* This dht xlator is not migrating the file. Unwind and
- * pass on the original mode bits so the higher DHT layer
- * can handle this.
- */
- DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
- local->rebalance.xdata);
- return 0;
- }
-
- if (subvol == NULL)
- goto err;
-
- if (local->fop == GF_FOP_REMOVEXATTR) {
- STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
- subvol->fops->removexattr, &local->loc, local->key,
- local->xattr_req);
- } else {
- STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
- subvol->fops->fremovexattr, local->fd, local->key,
- local->xattr_req);
- }
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int
-dht_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- xlator_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- UNLOCK(&frame->lock);
- gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
- prev->name);
- goto post_unlock;
- }
-
- local->op_ret = 0;
- }
- UNLOCK(&frame->lock);
-post_unlock:
- this_call_cnt = dht_frame_return(frame);
- if (is_last_call(this_call_cnt)) {
- DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
- NULL);
- }
-
- return 0;
-}
-
-int
dht_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *key, dict_t *xdata)
{
@@ -6264,7 +6309,7 @@ post_unlock:
/*
* dht_normalize_stats -
*/
-void
+static void
dht_normalize_stats(struct statvfs *buf, unsigned long bsize,
unsigned long frsize)
{
@@ -6283,7 +6328,7 @@ dht_normalize_stats(struct statvfs *buf, unsigned long bsize,
}
}
-int
+static int
dht_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
@@ -6393,9 +6438,7 @@ dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
int i = -1;
inode_t *inode = NULL;
inode_table_t *itable = NULL;
- uuid_t root_gfid = {
- 0,
- };
+ static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
loc_t newloc = {
0,
};
@@ -6421,7 +6464,6 @@ dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
}
loc = &local->loc2;
- root_gfid[15] = 1;
inode = inode_find(itable, root_gfid);
if (!inode) {
@@ -6539,7 +6581,7 @@ err:
this information layout can be constructed and set in inode.
*/
-void
+static void
dht_populate_inode_for_dentry(xlator_t *this, xlator_t *subvol,
gf_dirent_t *entry, gf_dirent_t *orig_entry)
{
@@ -6586,7 +6628,7 @@ out:
/* Posix returns op_errno = ENOENT to indicate that there are no more
* entries
*/
-int
+static int
dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
{
@@ -6642,10 +6684,9 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
layout = local->layout;
- /* We have seen crashes in while running "rm -rf" on tier volumes
- when the layout was NULL on the hot tier. This will skip the
- entries on the subvol without a layout, hence preventing the crash
- but rmdir might fail with "directory not empty" errors*/
+ /* This will skip the entries on the subvol without a layout,
+ * hence preventing the crash but rmdir might fail with
+ * "directory not empty" errors*/
if (layout == NULL)
goto done;
@@ -6663,13 +6704,12 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
orig_entry->d_name, orig_entry->d_type);
if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
- /*stat failed somewhere- ignore this entry*/
- gf_msg_debug(this->name, EINVAL,
- "Invalid stat, ignoring entry "
- "%s gfid %s",
+ /*stat failed somewhere- display this entry but the data may
+ * be inaccurate.
+ */
+ gf_msg_debug(this->name, EINVAL, "Invalid stat for %s (gfid %s)",
orig_entry->d_name,
uuid_utoa(orig_entry->d_stat.ia_gfid));
- continue;
}
if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict,
@@ -6878,7 +6918,7 @@ unwind:
return 0;
}
-int
+static int
dht_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
{
@@ -7001,7 +7041,7 @@ unwind:
return 0;
}
-int
+static int
dht_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t yoff, int whichop, dict_t *dict)
{
@@ -7125,7 +7165,7 @@ dht_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
return 0;
}
-int
+static int
dht_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xdata)
{
@@ -7255,7 +7295,7 @@ out:
return 0;
}
-int
+static int
dht_mknod_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -7306,7 +7346,7 @@ err:
return 0;
}
-int
+static int
dht_mknod_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
xlator_t *subvol, loc_t *loc, dev_t rdev,
mode_t mode, mode_t umask, dict_t *params)
@@ -7352,7 +7392,7 @@ out:
return 0;
}
-int32_t
+static int32_t
dht_mknod_do(call_frame_t *frame)
{
dht_local_t *local = NULL;
@@ -7402,7 +7442,7 @@ err:
return 0;
}
-int32_t
+static int32_t
dht_mknod_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -7410,7 +7450,7 @@ dht_mknod_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t
+static int32_t
dht_mknod_finish(call_frame_t *frame, xlator_t *this, int op_ret,
int invoke_cbk)
{
@@ -7462,7 +7502,7 @@ done:
return 0;
}
-int32_t
+static int32_t
dht_mknod_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -7499,7 +7539,7 @@ err:
return 0;
}
-int32_t
+static int32_t
dht_mknod_lock(call_frame_t *frame, xlator_t *subvol)
{
dht_local_t *local = NULL;
@@ -7544,7 +7584,7 @@ err:
return -1;
}
-int
+static int
dht_refresh_parent_layout_resume(call_frame_t *frame, xlator_t *this, int ret,
int invoke_cbk)
{
@@ -7574,7 +7614,7 @@ dht_refresh_parent_layout_resume(call_frame_t *frame, xlator_t *this, int ret,
return 0;
}
-int
+static int
dht_refresh_parent_layout_done(call_frame_t *frame)
{
dht_local_t *local = NULL;
@@ -7595,7 +7635,7 @@ resume:
return 0;
}
-int
+static int
dht_handle_parent_layout_change(xlator_t *this, call_stub_t *stub)
{
call_frame_t *refresh_frame = NULL, *frame = NULL;
@@ -7630,7 +7670,7 @@ dht_handle_parent_layout_change(xlator_t *this, call_stub_t *stub)
return 0;
}
-int32_t
+static int32_t
dht_call_mkdir_stub(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -7653,7 +7693,7 @@ dht_call_mkdir_stub(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t
+static int32_t
dht_guard_parent_layout_and_namespace(xlator_t *subvol, call_stub_t *stub)
{
dht_local_t *local = NULL;
@@ -7964,7 +8004,58 @@ err:
return 0;
}
-int
+static int
+dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY(sync_frame);
+ return 0;
+}
+
+static int
+dht_remove_stale_linkto(void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata_in = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out);
+
+ xdata_in = dict_new();
+ if (!xdata_in)
+ goto out;
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(xdata_in);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
+ "Failed to set keys for stale linkto"
+ "deletion on path %s",
+ local->loc.path);
+ goto out;
+ }
+
+ ret = syncop_unlink(local->link_subvol, &local->loc, xdata_in, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
+ "Removal of linkto failed"
+ " on path %s at subvol %s",
+ local->loc.path, local->link_subvol->name);
+ }
+out:
+ if (xdata_in)
+ dict_unref(xdata_in);
+ return ret;
+}
+
+static int
dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, inode_t *inode, struct iatt *stbuf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
@@ -8080,7 +8171,7 @@ out:
return 0;
}
-int
+static int
dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -8136,7 +8227,7 @@ err:
return 0;
}
-int
+static int
dht_link_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -8239,6 +8330,11 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
xlator_t *prev = NULL;
int ret = -1;
dht_local_t *local = NULL;
+ gf_boolean_t parent_layout_changed = _gf_false;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
local = frame->local;
if (!local) {
@@ -8247,8 +8343,69 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto out;
}
- if (op_ret == -1)
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ parent_layout_changed = (xdata &&
+ dict_get(xdata, GF_PREOP_CHECK_FAILED))
+ ? _gf_true
+ : _gf_false;
+
+ if (parent_layout_changed) {
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* Returning failure as the layout could not be fixed even under
+ * the lock */
+ goto out;
+ }
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "create (%s/%s) (path: %s): parent layout "
+ "changed. Attempting a layout refresh and then a "
+ "retry",
+ pgfid, local->loc.name, local->loc.path);
+
+ /*
+ dht_refresh_layout needs directory info in local->loc.Hence,
+ storing the parent_loc in local->loc and storing the create
+ context in local->loc2. We will restore this information in
+ dht_creation_do.
+ */
+
+ loc_wipe(&local->loc2);
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", local->loc.path);
+
+ goto out;
+ }
+
+ loc_wipe(&local->loc);
+
+ ret = dht_build_parent_loc(this, &local->loc, &local->loc2,
+ &op_errno);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
+ goto out;
+ }
+
+ subvol = dht_subvol_get_hashed(this, &local->loc2);
+
+ ret = dht_create_lock(frame, subvol);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto out;
+ }
+
+ return 0;
+ }
+
goto out;
+ }
prev = cookie;
@@ -8303,7 +8460,7 @@ out:
return 0;
}
-int
+static int
dht_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
@@ -8354,7 +8511,7 @@ err:
return 0;
}
-int
+static int
dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
xlator_t *subvol, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd,
@@ -8369,6 +8526,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
subvol->name);
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, loc, flags, mode, umask, fd,
params);
@@ -8377,10 +8536,6 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
avail_subvol = dht_free_disk_available_subvol(this, subvol, local);
if (avail_subvol != subvol) {
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
local->cached_subvol = avail_subvol;
local->hashed_subvol = subvol;
@@ -8396,6 +8551,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
subvol->name);
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, loc, flags, mode, umask, fd,
params);
@@ -8464,7 +8621,7 @@ out:
return ret;
}
-int32_t
+static int32_t
dht_create_do(call_frame_t *frame)
{
dht_local_t *local = NULL;
@@ -8514,7 +8671,7 @@ err:
return 0;
}
-int32_t
+static int32_t
dht_create_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -8522,7 +8679,7 @@ dht_create_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t
+static int32_t
dht_create_finish(call_frame_t *frame, xlator_t *this, int op_ret,
int invoke_cbk)
{
@@ -8574,7 +8731,7 @@ done:
return 0;
}
-int32_t
+static int32_t
dht_create_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -8657,6 +8814,60 @@ err:
}
int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local)
+{
+ dht_conf_t *conf = this->private;
+ dht_layout_t *parent_layout = NULL;
+ int *parent_disk_layout = NULL;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ parent_layout = dht_layout_get(this, loc->parent);
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY,
+ conf->xattr_name);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path,
+ GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting parent-layout in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+err:
+ dht_layout_unref(this, parent_layout);
+ return ret;
+}
+
+int
dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
{
@@ -8682,6 +8893,11 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto err;
}
+ local->params = dict_ref(params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+
if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) {
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
"creating %s on %s (got create on %s)", local->loc.path,
@@ -8697,10 +8913,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
if (hashed_subvol && (hashed_subvol != subvol)) {
/* Create the linkto file and then the data file */
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
local->cached_subvol = subvol;
local->hashed_subvol = hashed_subvol;
@@ -8713,6 +8925,9 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
* file as we expect a lookup everywhere if there are problems
* with the parent layout
*/
+
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, &local->loc, flags, mode, umask,
fd, params);
@@ -8764,11 +8979,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto err;
}
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
-
loc_wipe(&local->loc);
ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno);
@@ -8806,7 +9016,7 @@ err:
return 0;
}
-int
+static int
dht_mkdir_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -8840,7 +9050,7 @@ dht_mkdir_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, inode_t *inode, struct iatt *stbuf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
@@ -8908,13 +9118,13 @@ unlock:
return 0;
}
-int
+static int
dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata);
-int
+static int
dht_mkdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mode_t umask, dict_t *params)
{
@@ -9037,7 +9247,7 @@ err:
return 0;
}
-int
+static int
dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -9054,8 +9264,6 @@ dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gf_boolean_t parent_layout_changed = _gf_false;
call_stub_t *stub = NULL;
- VALIDATE_OR_GOTO(this->private, err);
-
local = frame->local;
prev = cookie;
layout = local->layout;
@@ -9170,7 +9378,7 @@ err:
return 0;
}
-int
+static int
dht_mkdir_guard_parent_layout_cbk(call_frame_t *frame, xlator_t *this,
loc_t *loc, mode_t mode, mode_t umask,
dict_t *params)
@@ -9321,7 +9529,7 @@ err:
return 0;
}
-int
+static int
dht_rmdir_selfheal_cbk(call_frame_t *heal_frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -9343,7 +9551,7 @@ dht_rmdir_selfheal_cbk(call_frame_t *heal_frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_rmdir_hashed_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -9446,7 +9654,63 @@ err:
return 0;
}
-int
+static int
+dht_rmdir_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int
+dht_rmdir_unlock(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+
+ /* Unlock entrylk */
+ dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns);
+
+ /* Unlock inodelk */
+ lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks,
+ local->lock[0].ns.parent_layout.lk_count);
+
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL)
+ goto done;
+
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL)
+ goto done;
+
+ lock_local->lock[0].ns.parent_layout.locks = local->lock[0]
+ .ns.parent_layout.locks;
+ lock_local->lock[0]
+ .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count;
+
+ local->lock[0].ns.parent_layout.locks = NULL;
+ local->lock[0].ns.parent_layout.lk_count = 0;
+ dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks,
+ lock_local->lock[0].ns.parent_layout.lk_count,
+ dht_rmdir_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+ return 0;
+}
+
+static int
dht_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
@@ -9581,63 +9845,7 @@ err:
return 0;
}
-int
-dht_rmdir_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- DHT_STACK_DESTROY(frame);
- return 0;
-}
-
-int
-dht_rmdir_unlock(call_frame_t *frame, xlator_t *this)
-{
- dht_local_t *local = NULL, *lock_local = NULL;
- call_frame_t *lock_frame = NULL;
- int lock_count = 0;
-
- local = frame->local;
-
- /* Unlock entrylk */
- dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns);
-
- /* Unlock inodelk */
- lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks,
- local->lock[0].ns.parent_layout.lk_count);
-
- if (lock_count == 0)
- goto done;
-
- lock_frame = copy_frame(frame);
- if (lock_frame == NULL)
- goto done;
-
- lock_local = dht_local_init(lock_frame, &local->loc, NULL,
- lock_frame->root->op);
- if (lock_local == NULL)
- goto done;
-
- lock_local->lock[0].ns.parent_layout.locks = local->lock[0]
- .ns.parent_layout.locks;
- lock_local->lock[0]
- .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count;
-
- local->lock[0].ns.parent_layout.locks = NULL;
- local->lock[0].ns.parent_layout.lk_count = 0;
- dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks,
- lock_local->lock[0].ns.parent_layout.lk_count,
- dht_rmdir_unlock_cbk);
- lock_frame = NULL;
-
-done:
- if (lock_frame != NULL) {
- DHT_STACK_DESTROY(lock_frame);
- }
-
- return 0;
-}
-
-int
+static int
dht_rmdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -9646,8 +9854,6 @@ dht_rmdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int i = 0;
xlator_t *hashed_subvol;
- VALIDATE_OR_GOTO(this->private, err);
-
conf = this->private;
local = frame->local;
@@ -9680,7 +9886,7 @@ err:
return 0;
}
-int
+static int
dht_rmdir_do(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
@@ -9744,7 +9950,63 @@ err:
return 0;
}
-int
+static void
+dht_rmdir_readdirp_done(call_frame_t *readdirp_frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ dht_local_t *main_local = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+ local = readdirp_frame->local;
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
+
+ /* At least one readdirp failed.
+ * This is a bit hit or miss - if readdirp failed on more than
+ * one subvol, we don't know which error is returned.
+ */
+ if (local->op_ret == -1) {
+ main_local->op_ret = local->op_ret;
+ main_local->op_errno = local->op_errno;
+ }
+
+ this_call_cnt = dht_frame_return(main_frame);
+
+ if (is_last_call(this_call_cnt))
+ dht_rmdir_do(main_frame, this);
+
+ DHT_STACK_DESTROY(readdirp_frame);
+}
+
+/* Keep sending readdirp on the subvol until it returns no more entries
+ * It is possible that not all entries will fit in a single readdirp in
+ * which case the rmdir will keep failing with ENOTEMPTY
+ */
+
+static int
+dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+
+ local = readdirp_frame->local;
+
+ if (local->op_ret == -1) {
+ /* there is no point doing another readdirp on this
+ * subvol . */
+ dht_rmdir_readdirp_done(readdirp_frame, this);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk,
+ local->hashed_subvol, local->hashed_subvol,
+ local->hashed_subvol->fops->readdirp, local->fd, 4096, 0,
+ local->xattr);
+
+ return 0;
+}
+
+static int
dht_rmdir_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -9788,7 +10050,7 @@ dht_rmdir_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_rmdir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr, struct iatt *parent)
@@ -9843,7 +10105,7 @@ err:
return 0;
}
-int
+static int
dht_rmdir_cached_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr,
@@ -9923,7 +10185,7 @@ err:
return 0;
}
-int
+static int
dht_rmdir_is_subvol_empty(call_frame_t *frame, xlator_t *this,
gf_dirent_t *entries, xlator_t *src)
{
@@ -10084,36 +10346,7 @@ err:
* No more entries on this subvol. Proceed to the actual rmdir operation.
*/
-void
-dht_rmdir_readdirp_done(call_frame_t *readdirp_frame, xlator_t *this)
-{
- call_frame_t *main_frame = NULL;
- dht_local_t *main_local = NULL;
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
-
- local = readdirp_frame->local;
- main_frame = local->main_frame;
- main_local = main_frame->local;
-
- /* At least one readdirp failed.
- * This is a bit hit or miss - if readdirp failed on more than
- * one subvol, we don't know which error is returned.
- */
- if (local->op_ret == -1) {
- main_local->op_ret = local->op_ret;
- main_local->op_errno = local->op_errno;
- }
-
- this_call_cnt = dht_frame_return(main_frame);
-
- if (is_last_call(this_call_cnt))
- dht_rmdir_do(main_frame, this);
-
- DHT_STACK_DESTROY(readdirp_frame);
-}
-
-int
+static int
dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, gf_dirent_t *entries,
dict_t *xdata)
@@ -10163,34 +10396,7 @@ dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-/* Keep sending readdirp on the subvol until it returns no more entries
- * It is possible that not all entries will fit in a single readdirp in
- * which case the rmdir will keep failing with ENOTEMPTY
- */
-
-int
-dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this)
-{
- dht_local_t *local = NULL;
-
- local = readdirp_frame->local;
-
- if (local->op_ret == -1) {
- /* there is no point doing another readdirp on this
- * subvol . */
- dht_rmdir_readdirp_done(readdirp_frame, this);
- return 0;
- }
-
- STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk,
- local->hashed_subvol, local->hashed_subvol,
- local->hashed_subvol->fops->readdirp, local->fd, 4096, 0,
- local->xattr);
-
- return 0;
-}
-
-int
+static int
dht_rmdir_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
@@ -10383,7 +10589,7 @@ err:
return 0;
}
-int
+static int
dht_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
@@ -10443,7 +10649,7 @@ err:
return 0;
}
-int
+static int
dht_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
@@ -10490,7 +10696,7 @@ err:
return 0;
}
-int32_t
+static int32_t
dht_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
{
@@ -10607,23 +10813,17 @@ dht_notify(xlator_t *this, int event, void *data, ...)
int had_heard_from_all = 0;
int have_heard_from_all = 0;
- struct timeval time = {
- 0,
- };
gf_defrag_info_t *defrag = NULL;
dict_t *dict = NULL;
gf_defrag_type cmd = 0;
dict_t *output = NULL;
va_list ap;
- dht_methods_t *methods = NULL;
struct gf_upcall *up_data = NULL;
struct gf_upcall_cache_invalidation *up_ci = NULL;
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- methods = &(conf->methods);
-
/* had all subvolumes reported status once till now? */
had_heard_from_all = 1;
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -10653,12 +10853,11 @@ dht_notify(xlator_t *this, int event, void *data, ...)
break;
}
- gettimeofday(&time, NULL);
LOCK(&conf->subvolume_lock);
{
conf->subvolume_status[cnt] = 1;
conf->last_event[cnt] = event;
- conf->subvol_up_time[cnt] = time.tv_sec;
+ conf->subvol_up_time[cnt] = gf_time();
}
UNLOCK(&conf->subvolume_lock);
@@ -10766,21 +10965,13 @@ dht_notify(xlator_t *this, int event, void *data, ...)
if (defrag->is_exiting)
goto unlock;
if ((cmd == GF_DEFRAG_CMD_STATUS) ||
- (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
(cmd == GF_DEFRAG_CMD_DETACH_STATUS))
gf_defrag_status_get(conf, output);
- else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
- gf_defrag_start_detach_tier(defrag);
else if (cmd == GF_DEFRAG_CMD_DETACH_START)
defrag->cmd = GF_DEFRAG_CMD_DETACH_START;
else if (cmd == GF_DEFRAG_CMD_STOP ||
- cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER ||
cmd == GF_DEFRAG_CMD_DETACH_STOP)
gf_defrag_stop(conf, GF_DEFRAG_STATUS_STOPPED, output);
- else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER)
- ret = gf_defrag_pause_tier(this, defrag);
- else if (cmd == GF_DEFRAG_CMD_RESUME_TIER)
- ret = gf_defrag_resume_tier(this, defrag);
}
unlock:
UNLOCK(&defrag->lock);
@@ -10828,6 +11019,7 @@ dht_notify(xlator_t *this, int event, void *data, ...)
}
if (!had_heard_from_all && have_heard_from_all) {
+ static int run_defrag = 0;
/* This is the first event which completes aggregation
of events from all subvolumes. If at least one subvol
had come up, propagate CHILD_UP, but only this time
@@ -10854,15 +11046,13 @@ dht_notify(xlator_t *this, int event, void *data, ...)
* thread has already started.
*/
if (conf->defrag && !run_defrag) {
- if (methods->migration_needed(this)) {
- run_defrag = 1;
- ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
- this, "dhtdg");
- if (ret) {
- GF_FREE(conf->defrag);
- conf->defrag = NULL;
- kill(getpid(), SIGTERM);
- }
+ run_defrag = 1;
+ ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
+ this, "dhtdg");
+ if (ret) {
+ GF_FREE(conf->defrag);
+ conf->defrag = NULL;
+ kill(getpid(), SIGTERM);
}
}
}
@@ -10943,8 +11133,7 @@ dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
for (i = 0; i < layout->cnt; i++) {
ret = snprintf(string, sizeof(string),
"[Subvol_name: %s, Err: %d , Start: "
- "%" PRIu32 " , Stop: %" PRIu32 " , Hash: %" PRIu32
- " ], ",
+ "0x%x, Stop: 0x%x, Hash: 0x%x], ",
layout->list[i].xlator->name, layout->list[i].err,
layout->list[i].start, layout->list[i].stop,
layout->list[i].commit_hash);
@@ -10973,8 +11162,7 @@ dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
for (i = 0; i < layout->cnt; i++) {
ret = snprintf(output_string + off, len - off,
"[Subvol_name: %s, Err: %d , Start: "
- "%" PRIu32 " , Stop: %" PRIu32 " , Hash: %" PRIu32
- " ], ",
+ "0x%x, Stop: 0x%x, Hash: 0x%x], ",
layout->list[i].xlator->name, layout->list[i].err,
layout->list[i].start, layout->list[i].stop,
layout->list[i].commit_hash);
@@ -11009,28 +11197,6 @@ out:
return ret;
}
-int32_t
-dht_migration_needed(xlator_t *this)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- conf = this->private;
-
- GF_VALIDATE_OR_GOTO("dht", conf, out);
- GF_VALIDATE_OR_GOTO("dht", conf->defrag, out);
-
- defrag = conf->defrag;
-
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER))
- ret = 1;
-
-out:
- return ret;
-}
-
/*
This function should not be called more then once during a FOP
handling path. It is valid only for for ops on files
@@ -11065,72 +11231,13 @@ dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf,
return 0;
}
-gf_boolean_t
-dht_is_tier_xlator(xlator_t *this)
-{
- if (strcmp(this->type, "cluster/tier") == 0)
- return _gf_true;
- return _gf_false;
-}
-
int32_t
dht_release(xlator_t *this, fd_t *fd)
{
return dht_fd_ctx_destroy(this, fd);
}
-int
-dht_remove_stale_linkto(void *data)
-{
- call_frame_t *frame = NULL;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- dict_t *xdata_in = NULL;
- int ret = 0;
-
- GF_VALIDATE_OR_GOTO("dht", data, out);
-
- frame = data;
- local = frame->local;
- this = frame->this;
- GF_VALIDATE_OR_GOTO("dht", this, out);
- GF_VALIDATE_OR_GOTO("dht", local, out);
- GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out);
-
- xdata_in = dict_new();
- if (!xdata_in)
- goto out;
-
- ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(xdata_in);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
- "Failed to set keys for stale linkto"
- "deletion on path %s",
- local->loc.path);
- goto out;
- }
-
- ret = syncop_unlink(local->link_subvol, &local->loc, xdata_in, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
- "Removal of linkto failed"
- " on path %s at subvol %s",
- local->loc.path, local->link_subvol->name);
- }
-out:
- if (xdata_in)
- dict_unref(xdata_in);
- return ret;
-}
-
-int
-dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data)
-{
- DHT_STACK_DESTROY(sync_frame);
- return 0;
-}
-
-int
+static int
dht_pt_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, inode_t *inode, struct iatt *stbuf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
@@ -11216,6 +11323,8 @@ dht_pt_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
conf = this->private;
dict_del(xattr, conf->xattr_name);
+ dict_del(xattr, conf->mds_xattr_key);
+ dict_del(xattr, conf->commithash_xattr_name);
if (frame->root->pid >= 0) {
GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
@@ -11261,3 +11370,22 @@ dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
return 0;
}
+
+/* The job of this function is to check if all the xlators have updated
+ * error in the layout. */
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+ layout = dht_layout_get(this, inode);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == 0) {
+ return 0;
+ }
+ }
+
+ /* Returning the first xlator error as all xlators have errors */
+ return layout->list[0].err;
+}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index c516271228e..fe0dc3db34a 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -9,8 +9,6 @@
*/
#include <regex.h>
-#include <signal.h>
-#include <fnmatch.h>
#include "dht-mem-types.h"
#include "dht-messages.h"
@@ -26,7 +24,6 @@
#define _DHT_H
#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
-#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete"
#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
#define DHT_MDS_STR "mds"
#define GF_DHT_LOOKUP_UNHASHED_OFF 0
@@ -38,22 +35,21 @@
#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
/* Namespace synchronization */
#define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync"
-#define TIERING_MIGRATION_KEY "tiering.migration"
#define DHT_LAYOUT_HASH_INVALID 1
#define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN)
#define DHT_DIR_STAT_BLOCKS 8
#define DHT_DIR_STAT_SIZE 4096
+/* Virtual xattr for subvols status */
+
+#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status"
+
/* Virtual xattrs for debugging */
#define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
#define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol."
-/* Array to hold custom xattr keys
- */
-extern char *xattrs_to_heal[];
-
/* Rebalance nodeuuid flags */
#define REBAL_NODEUUID_MINE 0x01
@@ -152,8 +148,8 @@ struct dht_rebalance_ {
dict_t *xdata;
dict_t *xattr;
dict_t *dict;
- int32_t set;
struct gf_flock flock;
+ int32_t set;
int lock_cmd;
};
@@ -176,24 +172,24 @@ typedef enum {
} dht_reaction_type_t;
struct dht_skip_linkto_unlink {
- gf_boolean_t handle_valid_link;
- int opend_fd_count;
xlator_t *hash_links_to;
uuid_t cached_gfid;
uuid_t hashed_gfid;
+ int opend_fd_count;
+ gf_boolean_t handle_valid_link;
};
typedef struct {
xlator_t *xl;
loc_t loc; /* contains/points to inode to lock on. */
- short type; /* read/write lock. */
char *domain; /* Only locks within a single domain
* contend with each other
*/
char *basename; /* Required for entrylk */
- gf_lkowner_t lk_owner;
gf_boolean_t locked;
dht_reaction_type_t do_on_failure;
+ short type; /* read/write lock. */
+ gf_lkowner_t lk_owner;
} dht_lock_t;
/* The lock structure represents inodelk. */
@@ -244,23 +240,10 @@ typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame,
dht_layout_t **inmem,
dht_layout_t **ondisk);
-typedef struct {
- uint64_t blocks_used;
- uint64_t pblocks_used;
- uint64_t files_used;
- uint64_t pfiles_used;
- uint64_t unhashed_blocks_used;
- uint64_t unhashed_pblocks_used;
- uint64_t unhashed_files_used;
- uint64_t unhashed_pfiles_used;
- uint64_t unhashed_fsid;
- uint64_t hashed_fsid;
-} tier_statvfs_t;
-
struct dht_local {
- int call_cnt;
loc_t loc;
loc_t loc2;
+ int call_cnt;
int op_ret;
int op_errno;
int layout_mismatch;
@@ -274,7 +257,6 @@ struct dht_local {
struct iatt preparent;
struct iatt postparent;
struct statvfs statvfs;
- tier_statvfs_t tier_statvfs;
fd_t *fd;
inode_t *inode;
dict_t *params;
@@ -290,9 +272,6 @@ struct dht_local {
xlator_t *cached_subvol;
xlator_t *hashed_subvol;
xlator_t *mds_subvol; /* This is use for dir only */
- char need_selfheal;
- char need_xattr_heal;
- char need_attrheal;
int file_count;
int dir_count;
call_frame_t *main_frame;
@@ -310,12 +289,12 @@ struct dht_local {
uint32_t overlaps_cnt;
uint32_t down;
uint32_t misc;
- uint32_t missing_cnt;
dht_selfheal_dir_cbk_t dir_cbk;
dht_selfheal_layout_t healer;
dht_need_heal_t should_heal;
- gf_boolean_t force_mkdir;
dht_layout_t *layout, *refreshed_layout;
+ uint32_t missing_cnt;
+ gf_boolean_t force_mkdir;
} selfheal;
dht_refresh_layout_unlock refresh_layout_unlock;
@@ -325,16 +304,18 @@ struct dht_local {
uint32_t gid;
pid_t pid;
+ glusterfs_fop_t fop;
+
+ /* need for file-info */
+ char *xattr_val;
+ char *key;
+
/* needed by nufa */
int32_t flags;
mode_t mode;
dev_t rdev;
mode_t umask;
- /* need for file-info */
- char *xattr_val;
- char *key;
-
/* which xattr request? */
char xsel[256];
int32_t alloc_len;
@@ -343,33 +324,17 @@ struct dht_local {
uuid_t gfid;
uuid_t gfid_req;
- /* flag used to make sure we need to return estale in
- {lookup,revalidate}_cbk */
- char return_estale;
- char need_lookup_everywhere;
-
- glusterfs_fop_t fop;
-
- gf_boolean_t linked;
xlator_t *link_subvol;
struct dht_rebalance_ rebalance;
xlator_t *first_up_subvol;
- gf_boolean_t quota_deem_statfs;
-
- gf_boolean_t added_link;
- gf_boolean_t is_linkfile;
-
struct dht_skip_linkto_unlink skip_unlink;
dht_dir_transaction_t lock[2], *current;
/* inodelks during filerename for backward compatibility */
dht_lock_t **rename_inodelk_backward_compatible;
- int rename_inodelk_bc_count;
-
- short lock_type;
call_stub_t *stub;
int32_t parent_disk_layout[4];
@@ -377,13 +342,27 @@ struct dht_local {
/* rename rollback */
int *ret_cache;
- /* fd open check */
- gf_boolean_t fd_checked;
+ loc_t loc2_copy;
+
+ int rename_inodelk_bc_count;
/* This is use only for directory operation */
int32_t valid;
- gf_boolean_t heal_layout;
int32_t mds_heal_fresh_lookup;
- loc_t loc2_copy;
+ short lock_type;
+ char need_selfheal;
+ char need_xattr_heal;
+ char need_attrheal;
+ /* flag used to make sure we need to return estale in
+ {lookup,revalidate}_cbk */
+ char return_estale;
+ char need_lookup_everywhere;
+ /* fd open check */
+ gf_boolean_t fd_checked;
+ gf_boolean_t linked;
+ gf_boolean_t added_link;
+ gf_boolean_t is_linkfile;
+ gf_boolean_t quota_deem_statfs;
+ gf_boolean_t heal_layout;
gf_boolean_t locked;
gf_boolean_t dont_create_linkto;
gf_boolean_t gfid_missing;
@@ -410,14 +389,7 @@ enum gf_defrag_type {
GF_DEFRAG_CMD_STATUS = 1 + 2,
GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
GF_DEFRAG_CMD_START_FORCE = 1 + 4,
- GF_DEFRAG_CMD_START_TIER = 1 + 5,
- GF_DEFRAG_CMD_STATUS_TIER = 1 + 6,
- GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7,
- GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8,
- GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9,
- GF_DEFRAG_CMD_RESUME_TIER = 1 + 10,
GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11,
- GF_DEFRAG_CMD_STOP_TIER = 1 + 12,
GF_DEFRAG_CMD_DETACH_START = 1 + 13,
GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14,
GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15,
@@ -468,75 +440,6 @@ struct dht_container {
int local_subvol_index;
};
-typedef enum tier_mode_ {
- TIER_MODE_NONE = 0,
- TIER_MODE_TEST,
- TIER_MODE_WM
-} tier_mode_t;
-
-typedef enum tier_pause_state_ {
- TIER_RUNNING = 0,
- TIER_REQUEST_PAUSE,
- TIER_PAUSED
-} tier_pause_state_t;
-
-/* This Structure is only used in tiering fixlayout */
-typedef struct gf_tier_fix_layout_arg {
- xlator_t *this;
- dict_t *fix_layout;
- pthread_t thread_id;
-} gf_tier_fix_layout_arg_t;
-
-typedef struct gf_tier_conf {
- int is_tier;
- int watermark_hi;
- int watermark_low;
- int watermark_last;
- unsigned long block_size;
- fsblkcnt_t blocks_total;
- fsblkcnt_t blocks_used;
- int percent_full;
- uint64_t max_migrate_bytes;
- int max_migrate_files;
- int query_limit;
- tier_mode_t mode;
- /* These flags are only used for tier-compact */
- gf_boolean_t compact_active;
- /* These 3 flags are set to true when the client changes the */
- /* compaction mode on the command line. */
- /* When they are set, the daemon will trigger compaction as */
- /* soon as possible to activate or deactivate compaction. */
- /* If in the middle of a compaction, then the switches take */
- /* effect on the next compaction, not the current one. */
- /* If the user switches it off, we want to avoid needless */
- /* compactions. */
- /* If the user switches it on, they want to compact as soon */
- /* as possible. */
- gf_boolean_t compact_mode_switched;
- gf_boolean_t compact_mode_switched_hot;
- gf_boolean_t compact_mode_switched_cold;
- int tier_max_promote_size;
- int tier_promote_frequency;
- int tier_demote_frequency;
- int tier_compact_hot_frequency;
- int tier_compact_cold_frequency;
- uint64_t st_last_promoted_size;
- uint64_t st_last_demoted_size;
- tier_pause_state_t pause_state;
- struct synctask *pause_synctask;
- gf_timer_t *pause_timer;
- pthread_mutex_t pause_mutex;
- int promote_in_progress;
- int demote_in_progress;
- /* This Structure is only used in tiering fixlayout */
- gf_tier_fix_layout_arg_t tier_fix_layout_arg;
- /* Indicates the index of the first queryfile picked
- * in the last cycle of promote or demote */
- int32_t last_promote_qfile_index;
- int32_t last_demote_qfile_index;
- char volname[GD_VOLUME_NAME_MAX + 1];
-} gf_tier_conf_t;
-
typedef struct nodeuuid_info {
char info; /* Set to 1 is this is my node's uuid*/
uuid_t uuid; /* Store the nodeuuid as well for debugging*/
@@ -556,26 +459,18 @@ struct gf_defrag_info_ {
uint64_t num_dirs_processed;
uint64_t size_processed;
gf_lock_t lock;
- int cmd;
pthread_t th;
- gf_defrag_status_t defrag_status;
struct rpc_clnt *rpc;
uint32_t connected;
uint32_t is_exiting;
pid_t pid;
+ int cmd;
inode_t *root_inode;
uuid_t node_uuid;
- struct timeval start_time;
- gf_boolean_t stats;
+ time_t start_time;
uint32_t new_commit_hash;
+ gf_defrag_status_t defrag_status;
gf_defrag_pattern_list_t *defrag_pattern;
- gf_tier_conf_t tier_conf;
-
- /*Data Tiering params for scanner*/
- uint64_t total_files_promoted;
- uint64_t total_files_demoted;
- int write_freq_threshold;
- int read_freq_threshold;
pthread_cond_t parallel_migration_cond;
pthread_mutex_t dfq_mutex;
@@ -590,18 +485,20 @@ struct gf_defrag_info_ {
/*Throttle params*/
/*stands for reconfigured thread count*/
int32_t recon_thread_count;
- /*stands for current running thread count*/
- int32_t current_thread_count;
pthread_cond_t df_wakeup_thread;
- /* lock migration flag */
- gf_boolean_t lock_migration_enabled;
-
/* backpointer to make it easier to write functions for rebalance */
xlator_t *this;
pthread_cond_t fc_wakeup_cond;
pthread_mutex_t fc_mutex;
+
+ /*stands for current running thread count*/
+ int32_t current_thread_count;
+
+ gf_boolean_t stats;
+ /* lock migration flag */
+ gf_boolean_t lock_migration_enabled;
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
@@ -609,7 +506,6 @@ typedef struct gf_defrag_info_ gf_defrag_info_t;
struct dht_methods_s {
int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local);
int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag);
- int32_t (*migration_needed)(xlator_t *this);
xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout,
const char *name);
};
@@ -617,36 +513,26 @@ struct dht_methods_s {
typedef struct dht_methods_s dht_methods_t;
struct dht_conf {
- gf_lock_t subvolume_lock;
- int subvolume_cnt;
xlator_t **subvolumes;
char *subvolume_status;
int *last_event;
dht_layout_t **file_layouts;
dht_layout_t **dir_layouts;
unsigned int search_unhashed;
- gf_boolean_t lookup_optimize;
int gen;
dht_du_t *du_stats;
double min_free_disk;
double min_free_inodes;
- char disk_unit;
+ int subvolume_cnt;
int32_t refresh_interval;
- gf_boolean_t unhashed_sticky_bit;
- struct timeval last_stat_fetch;
+ gf_lock_t subvolume_lock;
+ time_t last_stat_fetch;
gf_lock_t layout_lock;
dict_t *leaf_to_subvol;
void *private; /* Can be used by wrapper xlators over
dht */
- gf_boolean_t use_readdirp;
- char vol_uuid[UUID_SIZE + 1];
- gf_boolean_t assert_no_child_down;
time_t *subvol_up_time;
- /* This is the count used as the distribute layout for a directory */
- /* Will be a global flag to control the layout spread count */
- uint32_t dir_spread_cnt;
-
/* to keep track of nodes which are decommissioned */
xlator_t **decommissioned_bricks;
int decommission_in_progress;
@@ -655,15 +541,9 @@ struct dht_conf {
/* defrag related */
gf_defrag_info_t *defrag;
- /* Request to filter directory entries in readdir request */
-
- gf_boolean_t readdir_optimize;
-
/* Support regex-based name reinterpretation. */
regex_t rsync_regex;
- gf_boolean_t rsync_regex_valid;
regex_t extra_regex;
- gf_boolean_t extra_regex_valid;
/* Support variable xattr names. */
char *xattr_name;
@@ -672,11 +552,6 @@ struct dht_conf {
char *commithash_xattr_name;
char *wild_xattr_name;
- /* Support size-weighted rebalancing (heterogeneous bricks). */
- gf_boolean_t do_weighting;
- gf_boolean_t randomize_by_gfid;
- int dthrottle;
-
dht_methods_t methods;
struct mem_pool *lock_pool;
@@ -686,24 +561,55 @@ struct dht_conf {
subvol_nodeuuids_info_t *local_nodeuuids;
int32_t local_subvols_cnt;
+ int dthrottle;
+
+ /* Hard link handle requirement for migration triggered from client*/
+ synclock_t link_lock;
+
+ /* lock migration */
+ gf_lock_t lock;
+
+ /* This is the count used as the distribute layout for a directory */
+ /* Will be a global flag to control the layout spread count */
+ uint32_t dir_spread_cnt;
+
/*
* "Commit hash" for this volume topology. Changed whenever bricks
* are added or removed.
*/
uint32_t vol_commit_hash;
- gf_boolean_t vch_forced;
- /* lock migration */
+ char vol_uuid[UUID_SIZE + 1];
+
+ char disk_unit;
gf_boolean_t lock_migration_enabled;
- gf_lock_t lock;
- /* Hard link handle requirement for migration triggered from client*/
- synclock_t link_lock;
+ gf_boolean_t vch_forced;
gf_boolean_t use_fallocate;
gf_boolean_t force_migration;
+
+ gf_boolean_t lookup_optimize;
+
+ gf_boolean_t unhashed_sticky_bit;
+
+ gf_boolean_t assert_no_child_down;
+
+ gf_boolean_t use_readdirp;
+
+ /* Request to filter directory entries in readdir request */
+ gf_boolean_t readdir_optimize;
+
+ gf_boolean_t rsync_regex_valid;
+
+ gf_boolean_t extra_regex_valid;
+
+ /* Support size-weighted rebalancing (heterogeneous bricks). */
+ gf_boolean_t do_weighting;
+
+ gf_boolean_t randomize_by_gfid;
};
typedef struct dht_conf dht_conf_t;
@@ -742,6 +648,8 @@ struct dir_dfmeta {
struct list_head **head;
struct list_head **iterator;
int *fetch_entries;
+ /* fds corresponding to local subvols only */
+ fd_t **lfd;
};
typedef struct dht_migrate_info {
@@ -817,22 +725,18 @@ typedef struct dht_fd_ctx {
dht_local_wipe(__xl, __local); \
} while (0)
-#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, inode, post) \
+#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, post) \
do { \
- LOCK(&inode->lock); \
- { \
- if (ctx_sec == new_sec) \
- new_nsec = max(new_nsec, ctx_nsec); \
- else if (ctx_sec > new_sec) { \
- new_sec = ctx_sec; \
- new_nsec = ctx_nsec; \
- } \
- if (post) { \
- ctx_sec = new_sec; \
- ctx_nsec = new_nsec; \
- } \
+ if (ctx_sec == new_sec) \
+ new_nsec = max(new_nsec, ctx_nsec); \
+ else if (ctx_sec > new_sec) { \
+ new_sec = ctx_sec; \
+ new_nsec = ctx_nsec; \
+ } \
+ if (post) { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
} \
- UNLOCK(&inode->lock); \
} while (0)
#define is_greater_time(a, an, b, bn) \
@@ -877,7 +781,6 @@ dht_layout_anomalies(xlator_t *this, loc_t *loc, dht_layout_t *layout,
int
dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
loc_t *loc, dict_t *xattr);
-
xlator_t *
dht_linkfile_subvol(xlator_t *this, inode_t *inode, struct iatt *buf,
dict_t *xattr);
@@ -895,9 +798,6 @@ int
dht_disk_layout_extract(xlator_t *this, dht_layout_t *layout, int pos,
int32_t **disk_layout_p);
int
-dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
- void *disk_layout_raw, int disk_layout_len);
-int
dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
xlator_t *subvol, int32_t **disk_layout_p);
@@ -933,25 +833,17 @@ dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
xlator_t *this, xlator_t *tovol, xlator_t *fromvol,
loc_t *loc);
int
-dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc);
-int
dht_lookup_everywhere(call_frame_t *frame, xlator_t *this, loc_t *loc);
int
dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
loc_t *loc, dht_layout_t *layout);
-
-int
-dht_selfheal_directory_for_nameless_lookup(call_frame_t *frame,
- dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
-
int
dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
dht_layout_t *layout);
int
dht_selfheal_restore(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
loc_t *loc, dht_layout_t *layout);
-int
+void
dht_layout_sort_volname(dht_layout_t *layout);
int
@@ -968,14 +860,14 @@ dht_get_du_info_for_subvol(xlator_t *this, int subvol_idx);
int
dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode);
int
-dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol);
-int
dht_layout_set(xlator_t *this, inode_t *inode, dht_layout_t *layout);
;
void
dht_layout_unref(xlator_t *this, dht_layout_t *layout);
dht_layout_t *
dht_layout_ref(xlator_t *this, dht_layout_t *layout);
+int
+dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol);
xlator_t *
dht_first_up_subvol(xlator_t *this);
xlator_t *
@@ -1230,25 +1122,19 @@ dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
int
-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
-
-void
-gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state);
-
-tier_pause_state_t
-gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf);
+dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
int
-gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
-tier_pause_state_t
-gf_defrag_check_pause_tier(gf_tier_conf_t *defrag);
+dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata);
int
-gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
int
-gf_defrag_start_detach_tier(gf_defrag_info_t *defrag);
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
int
gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output);
@@ -1281,10 +1167,6 @@ int
dht_dir_attr_heal(void *data);
int
dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data);
-int
-dht_dir_has_layout(dict_t *xattr, char *name);
-gf_boolean_t
-dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator);
xlator_t *
dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
xlator_t *ignore, dht_layout_t *layout,
@@ -1293,15 +1175,18 @@ xlator_t *
dht_subvol_maxspace_nonzeroinode(xlator_t *this, xlator_t *subvol,
dht_layout_t *layout);
int
+dht_dir_has_layout(dict_t *xattr, char *name);
+int
dht_linkfile_attr_heal(call_frame_t *frame, xlator_t *this);
-void
-dht_layout_dump(dht_layout_t *layout, const char *prefix);
int32_t
dht_priv_dump(xlator_t *this);
int32_t
dht_inodectx_dump(xlator_t *this, inode_t *inode);
+gf_boolean_t
+dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator);
+
int
dht_inode_ctx_get_mig_info(xlator_t *this, inode_t *inode,
xlator_t **src_subvol, xlator_t **dst_subvol);
@@ -1315,11 +1200,6 @@ dht_subvol_status(dht_conf_t *conf, xlator_t *subvol);
void
dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
dht_layout_t *layout);
-int
-dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this);
-
-int
-dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict);
int
dht_layout_sort(dht_layout_t *layout);
@@ -1336,9 +1216,6 @@ dht_layout_missing_dirs(dht_layout_t *layout);
int
dht_refresh_layout(call_frame_t *frame);
-gf_boolean_t
-dht_is_tier_xlator(xlator_t *this);
-
int
dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child,
int32_t *op_errno);
@@ -1369,22 +1246,6 @@ dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
int
dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret);
-void
-dht_normalize_stats(struct statvfs *buf, unsigned long bsize,
- unsigned long frsize);
-
-int
-add_opt(char **optsp, const char *opt);
-
-int
-dht_aggregate_split_brain_xattr(dict_t *dst, char *key, data_t *value);
-
-int
-dht_remove_stale_linkto(void *data);
-
-int
-dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data);
-
int
dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *subvol);
@@ -1456,9 +1317,6 @@ dht_dir_heal_xattrs(void *data);
int
dht_dir_heal_xattrs_done(int ret, call_frame_t *sync_frame, void *data);
-void
-dht_aggregate_xattr(dict_t *dst, dict_t *src);
-
int32_t
dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size);
@@ -1470,25 +1328,12 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
dict_t *src, int *uret, int *uflag);
int
-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local);
-
-int32_t
-dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
- int *errst);
-
-xlator_t *
-dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc);
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno);
int
dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, int flag);
int
-dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata);
-
-int
-dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol);
-int
dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol);
int
@@ -1497,14 +1342,6 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
/* Abstract out the DHT-IATT-IN-DICT */
-int
-dht_request_iatt_in_xdata(xlator_t *this, dict_t *xattr_req);
-
-int
-dht_read_iatt_from_xdata(xlator_t *this, dict_t *xdata, struct iatt *stbuf);
-
-int
-is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2);
void
dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
dht_layout_t *new_layout);
@@ -1525,4 +1362,23 @@ int
dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata);
+int32_t
+dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno);
+
+int
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+dht_create_lock(call_frame_t *frame, xlator_t *subvol);
+
+int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local);
+
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode);
+
+int
+dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol);
#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 4d3905ab741..c0588828fdb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -10,11 +10,7 @@
/* TODO: add NS locking */
-#include <glusterfs/glusterfs.h>
-#include <glusterfs/xlator.h>
#include "dht-common.h"
-#include "dht-messages.h"
-#include <glusterfs/defaults.h>
#include <sys/time.h>
#include <glusterfs/events.h>
@@ -155,22 +151,18 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- struct timeval tv = {
- 0,
- };
loc_t tmp_loc = {
0,
};
+ time_t now;
conf = this->private;
-
- gettimeofday(&tv, NULL);
-
+ now = gf_time();
/* make it root gfid, should be enough to get the proper
info back */
tmp_loc.gfid[15] = 1;
- if (tv.tv_sec > (conf->refresh_interval + conf->last_stat_fetch.tv_sec)) {
+ if (now > (conf->refresh_interval + conf->last_stat_fetch)) {
statfs_frame = copy_frame(frame);
if (!statfs_frame) {
goto err;
@@ -202,7 +194,7 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
statfs_local->params);
}
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ conf->last_stat_fetch = now;
}
return 0;
err:
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index 3def6b17666..acda67c312a 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -8,13 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/glusterfs.h>
-#include <glusterfs/xlator.h>
#include "dht-common.h"
#include <glusterfs/hashfn.h>
-int
-dht_hash_compute_internal(int type, const char *name, uint32_t *hash_p)
+static int
+dht_hash_compute_internal(int type, const char *name, const int len,
+ uint32_t *hash_p)
{
int ret = 0;
uint32_t hash = 0;
@@ -22,7 +21,7 @@ dht_hash_compute_internal(int type, const char *name, uint32_t *hash_p)
switch (type) {
case DHT_HASH_TYPE_DM:
case DHT_HASH_TYPE_DM_USER:
- hash = gf_dm_hashfn(name, strlen(name));
+ hash = gf_dm_hashfn(name, len);
break;
default:
ret = -1;
@@ -36,7 +35,12 @@ dht_hash_compute_internal(int type, const char *name, uint32_t *hash_p)
return ret;
}
-static gf_boolean_t
+/* The function returns:
+ * 0 : in case no munge took place
+ * >0 : the length (inc. terminating NULL!) of the newly modified string,
+ * if it was munged.
+ */
+static int
dht_munge_name(const char *original, char *modified, size_t len, regex_t *re)
{
regmatch_t matches[2] = {
@@ -54,14 +58,14 @@ dht_munge_name(const char *original, char *modified, size_t len, regex_t *re)
if (new_len < len) {
memcpy(modified, original + matches[1].rm_so, new_len);
modified[new_len] = '\0';
- return _gf_true;
+ return new_len + 1; /* +1 for the terminating NULL */
}
}
}
/* This is guaranteed safe because of how the dest was allocated. */
strcpy(modified, original);
- return _gf_false;
+ return 0;
}
int
@@ -70,10 +74,13 @@ dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p)
char *rsync_friendly_name = NULL;
dht_conf_t *priv = NULL;
size_t len = 0;
- gf_boolean_t munged = _gf_false;
+ int munged = 0;
priv = this->private;
+ if (name == NULL)
+ return -1;
+
len = strlen(name) + 1;
rsync_friendly_name = alloca(len);
@@ -88,19 +95,16 @@ dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p)
gf_msg_trace(this->name, 0, "trying regex for %s", name);
munged = dht_munge_name(name, rsync_friendly_name, len,
&priv->rsync_regex);
- if (munged) {
- UNLOCK(&priv->lock);
- gf_msg_debug(this->name, 0, "munged down to %s",
- rsync_friendly_name);
- goto post_unlock;
- }
}
}
UNLOCK(&priv->lock);
-post_unlock:
- if (!munged) {
+ if (munged) {
+ gf_msg_debug(this->name, 0, "munged down to %s", rsync_friendly_name);
+ len = munged;
+ } else {
rsync_friendly_name = (char *)name;
}
- return dht_hash_compute_internal(type, rsync_friendly_name, hash_p);
+ return dht_hash_compute_internal(type, rsync_friendly_name, len - 1,
+ hash_p);
}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 4c57e0d2efc..3f2fe43d5f3 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -8,10 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/glusterfs.h>
-#include <glusterfs/xlator.h>
#include "dht-common.h"
#include "dht-lock.h"
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
static void
dht_free_fd_ctx(dht_fd_ctx_t *fd_ctx)
@@ -65,8 +64,8 @@ __dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
ret = __fd_ctx_set(fd, this, value);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED,
- "Failed to set fd ctx in fd=0x%p", fd);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED,
+ "fd=0x%p", fd, NULL);
GF_REF_PUT(fd_ctx);
}
out:
@@ -97,8 +96,8 @@ dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
/* Overwrite and hope for the best*/
fd_ctx->opened_on_dst = (uint64_t)(uintptr_t)dst;
UNLOCK(&fd->lock);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE,
- "Different dst found in the fd ctx");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE,
+ NULL);
goto out;
}
@@ -366,10 +365,27 @@ dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame,
break;
+ case GF_FOP_FXATTROP:
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, local->fd,
+ local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr,
+ local->fd, local->key, NULL);
+ break;
+
+ case GF_FOP_FINODELK:
+ STACK_WIND(frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+ local->key, local->fd, local->rebalance.lock_cmd,
+ &local->rebalance.flock, local->xattr_req);
+ break;
default:
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP,
- "Unknown FOP on fd (%p) on file %s @ %s", fd,
- uuid_utoa(fd->inode->gfid), subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
break;
}
@@ -429,10 +445,22 @@ handle_err:
DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
break;
+ case GF_FOP_FXATTROP:
+ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FINODELK:
+ DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL);
+ break;
+
default:
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP,
- "Unknown FOP on fd (%p) on file %s @ %s", fd,
- uuid_utoa(fd->inode->gfid), subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
break;
}
@@ -485,10 +513,9 @@ dht_check_and_open_fd_on_subvol_task(void *data)
fd, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED,
- "Failed to open the fd"
- " (%p, flags=0%o) on file %s @ %s",
- fd, fd->flags, uuid_utoa(fd->inode->gfid), subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED,
+ "fd=%p", fd, "flags=0%o", fd->flags, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), "name=%s", subvol->name, NULL);
/* This can happen if the cached subvol was updated in the
* inode_ctx and the fd was opened on the new cached suvol
* after this fop was wound on the old cached subvol.
@@ -534,10 +561,8 @@ dht_check_and_open_fd_on_subvol(xlator_t *this, call_frame_t *frame)
dht_check_and_open_fd_on_subvol_complete, frame, frame);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, 0,
- "Failed to create synctask"
- " to check and open fd=%p",
- local->fd);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ "to-check-and-open fd=%p", local->fd, NULL);
}
return ret;
@@ -646,9 +671,7 @@ dht_get_subvol_from_id(xlator_t *this, int client_id)
ret = gf_asprintf(&sid, "%d", client_id);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED,
- "asprintf failed while "
- "fetching subvol from the id");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED, NULL);
goto out;
}
@@ -1261,6 +1284,7 @@ dht_migration_complete_check_task(void *data)
fd_t *tmp = NULL;
uint64_t tmp_miginfo = 0;
dht_migrate_info_t *miginfo = NULL;
+ gf_boolean_t skip_open = _gf_false;
int open_failed = 0;
this = THIS;
@@ -1307,9 +1331,9 @@ dht_migration_complete_check_task(void *data)
* migrated by two different layers. Raise
* a warning here.
*/
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO,
- "%s: Found miginfo in the inode ctx",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid));
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
miginfo = (void *)(uintptr_t)tmp_miginfo;
GF_REF_PUT(miginfo);
@@ -1330,10 +1354,9 @@ dht_migration_complete_check_task(void *data)
ret = syncop_lookup(this, &tmp_loc, &stbuf, 0, 0, 0);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
- "%s: failed to lookup the file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- this->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", this->name, NULL);
local->op_errno = -ret;
ret = -1;
goto out;
@@ -1341,18 +1364,15 @@ dht_migration_complete_check_task(void *data)
dst_node = dht_subvol_get_cached(this, tmp_loc.inode);
if (linkto_target && dst_node != linkto_target) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE,
- "linkto target (%s) is "
- "different from cached-subvol (%s). Treating %s as "
- "destination subvol",
- linkto_target->name, dst_node->name, dst_node->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE,
+ "linkto_target_name=%s", linkto_target->name, "dst_name=%s",
+ dst_node->name, NULL);
}
if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on the target file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "dst_name=%s", dst_node->name, NULL);
ret = -1;
local->op_errno = EIO;
goto out;
@@ -1399,24 +1419,34 @@ dht_migration_complete_check_task(void *data)
* the loop will cause the destruction of the fd. So we need to
* iterate the list safely because iter_fd cannot be trusted.
*/
- list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list)
- {
- if (fd_is_anonymous(iter_fd))
- continue;
-
- if (dht_fd_open_on_dst(this, iter_fd, dst_node))
- continue;
-
+ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
+ while (&iter_fd->inode_list != (&inode->fd_list)) {
+ if (fd_is_anonymous(iter_fd) ||
+ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
+ if (!tmp) {
+ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
+ inode_list);
+ continue;
+ }
+ skip_open = _gf_true;
+ }
/* We need to release the inode->lock before calling
* syncop_open() to avoid possible deadlocks. However this
* can cause the iter_fd to be released by other threads.
* To avoid this, we take a reference before releasing the
* lock.
*/
- __fd_ref(iter_fd);
+ fd_ref(iter_fd);
UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
+ if (skip_open)
+ goto next;
+
/* flags for open are stripped down to allow following the
* new location of the file, otherwise we can get EEXIST or
* truncate the file again as rebalance is moving the data */
@@ -1424,12 +1454,10 @@ dht_migration_complete_check_task(void *data)
(iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
iter_fd, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_OPEN_FD_ON_DST_FAILED,
- "failed"
- " to open the fd"
- " (%p, flags=0%o) on file %s @ %s",
- iter_fd, iter_fd->flags, path, dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "id=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
open_failed = 1;
local->op_errno = -ret;
@@ -1438,9 +1466,11 @@ dht_migration_complete_check_task(void *data)
dht_fd_ctx_set(this, iter_fd, dst_node);
}
- fd_unref(iter_fd);
-
+ next:
LOCK(&inode->lock);
+ skip_open = _gf_false;
+ tmp = iter_fd;
+ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
}
SYNCTASK_SETID(frame->root->uid, frame->root->gid);
@@ -1453,6 +1483,10 @@ dht_migration_complete_check_task(void *data)
unlock:
UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
out:
if (dict) {
@@ -1534,6 +1568,7 @@ dht_rebalance_inprogress_task(void *data)
int open_failed = 0;
uint64_t tmp_miginfo = 0;
dht_migrate_info_t *miginfo = NULL;
+ gf_boolean_t skip_open = _gf_false;
this = THIS;
frame = data;
@@ -1576,9 +1611,9 @@ dht_rebalance_inprogress_task(void *data)
* migrated by two different layers. Raise
* a warning here.
*/
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO,
- "%s: Found miginfo in the inode ctx",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid));
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
miginfo = (void *)(uintptr_t)tmp_miginfo;
GF_REF_PUT(miginfo);
}
@@ -1587,17 +1622,16 @@ dht_rebalance_inprogress_task(void *data)
}
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED,
- "%s: failed to get the 'linkto' xattr", local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
ret = -1;
goto out;
}
dst_node = dht_linkfile_subvol(this, NULL, NULL, dict);
if (!dst_node) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_NOT_FOUND,
- "%s: failed to get the 'linkto' xattr from dict",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
ret = -1;
goto out;
}
@@ -1614,20 +1648,17 @@ dht_rebalance_inprogress_task(void *data)
/* lookup on dst */
ret = syncop_lookup(dst_node, &tmp_loc, &stbuf, NULL, NULL, NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_FILE_LOOKUP_ON_DST_FAILED,
- "%s: failed to lookup the file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
ret = -1;
goto out;
}
if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on the target file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
ret = -1;
goto out;
}
@@ -1654,24 +1685,40 @@ dht_rebalance_inprogress_task(void *data)
* the loop will cause the destruction of the fd. So we need to
* iterate the list safely because iter_fd cannot be trusted.
*/
- list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list)
- {
- if (fd_is_anonymous(iter_fd))
- continue;
-
- if (dht_fd_open_on_dst(this, iter_fd, dst_node))
- continue;
-
+ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
+ while (&iter_fd->inode_list != (&inode->fd_list)) {
/* We need to release the inode->lock before calling
* syncop_open() to avoid possible deadlocks. However this
* can cause the iter_fd to be released by other threads.
* To avoid this, we take a reference before releasing the
* lock.
*/
- __fd_ref(iter_fd);
+ if (fd_is_anonymous(iter_fd) ||
+ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
+ if (!tmp) {
+ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
+ inode_list);
+ continue;
+ }
+ skip_open = _gf_true;
+ }
+
+ /* Yes, this is ugly but there isn't a cleaner way to do this
+ * the fd_ref is an atomic increment so not too bad. We want to
+ * reduce the number of inode locks and unlocks.
+ */
+
+ fd_ref(iter_fd);
UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
+ if (skip_open)
+ goto next;
+
/* flags for open are stripped down to allow following the
* new location of the file, otherwise we can get EEXIST or
* truncate the file again as rebalance is moving the data */
@@ -1679,11 +1726,10 @@ dht_rebalance_inprogress_task(void *data)
(iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
iter_fd, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_OPEN_FD_ON_DST_FAILED,
- "failed to send open "
- "the fd (%p, flags=0%o) on file %s @ %s",
- iter_fd, iter_fd->flags, path, dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "fd=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
ret = -1;
open_failed = 1;
} else {
@@ -1692,9 +1738,11 @@ dht_rebalance_inprogress_task(void *data)
dht_fd_ctx_set(this, iter_fd, dst_node);
}
- fd_unref(iter_fd);
-
+ next:
LOCK(&inode->lock);
+ skip_open = _gf_false;
+ tmp = iter_fd;
+ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
}
SYNCTASK_SETID(frame->root->uid, frame->root->gid);
@@ -1702,6 +1750,10 @@ dht_rebalance_inprogress_task(void *data)
unlock:
UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
if (open_failed) {
ret = -1;
goto out;
@@ -1709,9 +1761,8 @@ unlock:
ret = dht_inode_ctx_set_mig_info(this, inode, src_node, dst_node);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
- "%s: failed to set inode-ctx target file at %s", local->loc.path,
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "path=%s", local->loc.path, "name=%s", dst_node->name, NULL);
goto out;
}
@@ -1804,12 +1855,16 @@ dht_inode_ctx_time_update(inode_t *inode, xlator_t *this, struct iatt *stat,
time = &ctx->time;
- DHT_UPDATE_TIME(time->mtime, time->mtime_nsec, stat->ia_mtime,
- stat->ia_mtime_nsec, inode, post);
- DHT_UPDATE_TIME(time->ctime, time->ctime_nsec, stat->ia_ctime,
- stat->ia_ctime_nsec, inode, post);
- DHT_UPDATE_TIME(time->atime, time->atime_nsec, stat->ia_atime,
- stat->ia_atime_nsec, inode, post);
+ LOCK(&inode->lock);
+ {
+ DHT_UPDATE_TIME(time->mtime, time->mtime_nsec, stat->ia_mtime,
+ stat->ia_mtime_nsec, post);
+ DHT_UPDATE_TIME(time->ctime, time->ctime_nsec, stat->ia_ctime,
+ stat->ia_ctime_nsec, post);
+ DHT_UPDATE_TIME(time->atime, time->atime_nsec, stat->ia_atime,
+ stat->ia_atime_nsec, post);
+ }
+ UNLOCK(&inode->lock);
ret = dht_inode_ctx_set(inode, this, ctx);
out:
@@ -1878,9 +1933,7 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
};
char *bname = NULL;
char *save_ptr = NULL;
- uuid_t gfid = {
- 0,
- };
+ static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
char *tmp_path = NULL;
tmp_path = gf_strdup(path);
@@ -1888,9 +1941,6 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
goto out;
}
- memset(gfid, 0, 16);
- gfid[15] = 1;
-
gf_uuid_copy(loc.pargfid, gfid);
loc.parent = inode_ref(itable->root);
@@ -1934,10 +1984,9 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
ret = syncop_lookup(this, &loc, &iatt, NULL, NULL, NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Healing of path %s failed on subvolume %s for "
- "directory %s",
- path, this->name, bname);
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
+ "path=%s", path, "subvolume=%s", this->name, "bname=%s",
+ bname, NULL);
goto out;
}
@@ -1995,10 +2044,8 @@ dht_heal_full_path(void *data)
ret = syncop_getxattr(source, &loc, &dict, GET_ANCESTRY_PATH_KEY, NULL,
NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Failed to get path from subvol %s. Aborting "
- "directory healing.",
- source->name);
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_HEAL_ABORT,
+ "subvol=%s", source->name, NULL);
goto out;
}
@@ -2036,6 +2083,7 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
dht_local_t *local = NULL;
xlator_t *this = NULL;
int ret = -1;
+ int op_errno = 0;
local = heal_frame->local;
main_frame = local->main_frame;
@@ -2045,10 +2093,12 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
dht_set_fixed_dir_stat(&local->postparent);
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "xattr heal failed for directory %s ", local->loc.path);
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ NULL);
+ }
}
DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf,
@@ -2136,8 +2186,8 @@ dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
if (ret) {
gf_uuid_unparse(inode->gfid, gfid);
UNLOCK(&inode->lock);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED,
- "Failed to set lock_subvol in inode ctx for gfid %s", gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "lock_subvol gfid=%s", gfid, NULL);
goto post_unlock;
}
subvol = cached_subvol;
@@ -2167,8 +2217,8 @@ dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret)
inode = local->loc.inode ? local->loc.inode : local->fd->inode;
}
if (!inode) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED,
- "Found a NULL inode. Failed to unref the inode");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ NULL);
goto out;
}
@@ -2194,11 +2244,8 @@ dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret)
inode_unref(inode);
} else {
gf_uuid_unparse(inode->gfid, gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LOCK_INODE_UNREF_FAILED,
- "Unlock request failed for gfid %s."
- "Failed to unref the inode",
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCK_INODE_UNREF_FAILED, "gfid=%s", gfid, NULL);
goto out;
}
default:
@@ -2220,12 +2267,11 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
int luret = -1;
int luflag = -1;
int i = 0;
+ char **xattrs_to_heal;
if (!src || !dst) {
- gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED,
- "src or dst is NULL. Failed to set "
- " dictionary value for path %s",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DST_NULL_SET_FAILED,
+ "path=%s", local->loc.path, NULL);
return;
}
/* Check if any user xattr present in src dict and set
@@ -2236,17 +2282,18 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
and set it to dst dict, here index start from 1 because
user xattr already checked in previous statement
*/
+
+ xattrs_to_heal = get_xattrs_to_heal();
+
for (i = 1; xattrs_to_heal[i]; i++) {
keyval = dict_get(src, xattrs_to_heal[i]);
if (keyval) {
luflag = 1;
ret = dict_set(dst, xattrs_to_heal[i], keyval);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- xattrs_to_heal[i], local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED, "key=%s", xattrs_to_heal[i],
+ "path=%s", local->loc.path, NULL);
keyval = NULL;
}
}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index cacfe353272..dbb8070b0da 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -10,25 +10,25 @@
#include "dht-common.h"
-int
+static int
dht_access2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_readv2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_attr2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_open2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_flush2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_lk2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_fsync2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
int ret);
-int
+static int
dht_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, fd_t *fd, dict_t *xdata)
{
@@ -67,7 +67,7 @@ out:
return 0;
}
-int
+static int
dht_open2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -162,8 +162,8 @@ dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
local = frame->local;
prev = cookie;
- if ((local->fop == GF_FOP_FSTAT) && (op_ret == -1) && (op_errno == EBADF) &&
- !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FSTAT) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -216,7 +216,7 @@ err:
return 0;
}
-int
+static int
dht_attr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -258,7 +258,7 @@ out:
return 0;
}
-int
+static int
dht_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct iatt *stbuf, dict_t *xdata)
{
@@ -431,7 +431,7 @@ dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
if (local->call_cnt != 1)
goto out;
- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -473,7 +473,7 @@ out:
return 0;
}
-int
+static int
dht_readv2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -558,7 +558,7 @@ err:
return 0;
}
-int
+static int
dht_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xdata)
{
@@ -606,7 +606,7 @@ out:
return 0;
}
-int
+static int
dht_access2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -703,7 +703,7 @@ dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
if (local->call_cnt != 1)
goto out;
- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -735,7 +735,7 @@ out:
return 0;
}
-int
+static int
dht_flush2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -820,7 +820,7 @@ dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
local->op_errno = op_errno;
- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -881,7 +881,7 @@ out:
return 0;
}
-int
+static int
dht_fsync2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -959,7 +959,7 @@ err:
/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
indicate that lock migration happened on the fd, so we can consider it as
phase 2 of migration */
-int
+static int
dht_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct gf_flock *flock, dict_t *xdata)
{
@@ -1006,7 +1006,7 @@ out:
return 0;
}
-int
+static int
dht_lk2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -1087,7 +1087,7 @@ err:
return 0;
}
-int
+static int
dht_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct gf_lease *lease, dict_t *xdata)
{
@@ -1129,7 +1129,7 @@ err:
}
/* Symlinks are currently not migrated, so no need for any check here */
-int
+static int
dht_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, const char *path, struct iatt *stbuf,
dict_t *xdata)
@@ -1192,6 +1192,29 @@ err:
return 0;
}
+/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ * This will return a dummy iatt with only the mode and type set
+ */
+static int
+dht_read_iatt_from_xdata(dict_t *xdata, struct iatt *stbuf)
+{
+ int ret = -1;
+ int32_t mode = 0;
+
+ ret = dict_get_int32(xdata, DHT_MODE_IN_XDATA_KEY, &mode);
+
+ if (ret) {
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+ } else {
+ stbuf->ia_prot = ia_prot_from_st_mode(mode);
+ stbuf->ia_type = ia_type_from_st_mode(mode);
+ }
+
+ return ret;
+}
+
int
dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict,
@@ -1223,7 +1246,14 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->call_cnt != 1)
goto out;
- ret = dht_read_iatt_from_xdata(this, xdata, &stbuf);
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+ ret = dht_read_iatt_from_xdata(xdata, &stbuf);
if ((!op_ret) && (ret)) {
/* This is a potential problem and can cause corruption
@@ -1275,7 +1305,7 @@ out:
return 0;
}
-int
+static int
dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
int ret)
{
@@ -1334,7 +1364,7 @@ out:
return 0;
}
-int
+static int
dht_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
@@ -1342,6 +1372,22 @@ dht_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
+/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ */
+static int
+dht_request_iatt_in_xdata(dict_t *xattr_req)
+{
+ int ret = -1;
+
+ ret = dict_set_int8(xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
+ ret = dict_set_int8(xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+
+ /* At least one call succeeded */
+ return ret;
+}
+
int
dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
@@ -1384,7 +1430,7 @@ dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
local->rebalance.xattr = dict_ref(dict);
local->rebalance.flags = flags;
- ret = dht_request_iatt_in_xdata(this, local->xattr_req);
+ ret = dht_request_iatt_in_xdata(local->xattr_req);
if (ret) {
gf_msg_debug(this->name, 0,
@@ -1406,7 +1452,7 @@ err:
return 0;
}
-int
+static int
dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
@@ -1454,7 +1500,7 @@ dht_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
local->rebalance.xattr = dict_ref(dict);
local->rebalance.flags = flags;
- ret = dht_request_iatt_in_xdata(this, local->xattr_req);
+ ret = dht_request_iatt_in_xdata(local->xattr_req);
if (ret) {
gf_msg_debug(this->name, 0, "Failed to set dictionary key %s fd=%p",
@@ -1479,7 +1525,7 @@ err:
* below fops, hence not implementing 'migration' related checks
*/
-int
+static int
dht_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
@@ -1535,8 +1581,26 @@ dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+
+ local = frame->local;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+out:
dht_lk_inode_unref(frame, op_ret);
DHT_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
+
return 0;
}
@@ -1574,6 +1638,13 @@ dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
if (ret)
goto err;
*/
+ local->rebalance.flock = *lock;
+ local->rebalance.lock_cmd = cmd;
+ local->key = gf_strdup(volume);
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
STACK_WIND(frame, dht_finodelk_cbk, lock_subvol,
lock_subvol->fops->finodelk, volume, fd, cmd, lock, xdata);
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index b26b7058d3e..2f23ce90fbd 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -10,17 +10,17 @@
#include "dht-common.h"
-int
+static int
dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
int
@@ -49,7 +49,7 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* We only check once as this could be a valid bad fd error.
*/
- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -93,30 +93,28 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
/* Check if the rebalance phase1 is true */
if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
- if (!dht_is_tier_xlator(this)) {
+ if (!local->xattr_req) {
+ local->xattr_req = dict_new();
if (!local->xattr_req) {
- local->xattr_req = dict_new();
- if (!local->xattr_req) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
- "insufficient memory");
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
- }
-
- ret = dict_set_uint32(local->xattr_req,
- GF_PROTECT_FROM_EXTERNAL_WRITES, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
- "Failed to set key %s in dictionary",
- GF_PROTECT_FROM_EXTERNAL_WRITES);
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "insufficient memory");
local->op_errno = ENOMEM;
local->op_ret = -1;
goto out;
}
}
+ ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES,
+ 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
+ "Failed to set key %s in dictionary",
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+
dht_iatt_merge(this, &local->stbuf, postbuf);
dht_iatt_merge(this, &local->prebuf, prebuf);
@@ -142,7 +140,7 @@ out:
return 0;
}
-int
+static int
dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -262,8 +260,8 @@ dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* We only check once as this could actually be a valid error.
*/
- if ((local->fop == GF_FOP_FTRUNCATE) && (op_ret == -1) &&
- ((op_errno == EBADF) || (op_errno == EINVAL)) && !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FTRUNCATE) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -336,7 +334,7 @@ err:
return 0;
}
-int
+static int
dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -489,7 +487,7 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* We only check once as this could actually be a valid error.
*/
- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -555,7 +553,7 @@ err:
return 0;
}
-int
+static int
dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -666,7 +664,7 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* and a lookup updated the cached subvol in the inode ctx.
* We only check once as this could actually be a valid error.
*/
- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -731,7 +729,7 @@ err:
return 0;
}
-int
+static int
dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -838,7 +836,7 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* and a lookup updated the cached subvol in the inode ctx.
* We only check once as this could actually be a valid error.
*/
- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -902,7 +900,7 @@ err:
return 0;
}
-int
+static int
dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -1005,8 +1003,8 @@ dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
- if ((local->fop == GF_FOP_FSETATTR) && (op_ret == -1) &&
- (op_errno == EBADF) && !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FSETATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -1049,7 +1047,7 @@ out:
return 0;
}
-int
+static int
dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 35bd3fbd25e..fda904c92c9 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -8,11 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/glusterfs.h>
-#include <glusterfs/xlator.h>
#include "dht-common.h"
#include <glusterfs/byte-order.h>
-#include "dht-messages.h"
#include "unittest/unittest.h"
#define layout_base_size (sizeof(dht_layout_t))
@@ -134,9 +131,8 @@ dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name)
ret = dht_hash_compute(this, layout->type, name, &hash);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED,
- "hash computation failed for type=%d name=%s", layout->type,
- name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED,
+ "type=%d", layout->type, "name=%s", name, NULL);
goto out;
}
@@ -148,8 +144,8 @@ dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name)
}
if (!subvol) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "no subvolume for hash (value) = %u", hash);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "hash-value=0x%x", hash, NULL);
}
out:
@@ -258,7 +254,7 @@ dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
return dht_disk_layout_extract(this, layout, i, disk_layout_p);
}
-int
+static int
dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
void *disk_layout_raw, int disk_layout_len)
{
@@ -269,8 +265,8 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
int disk_layout[4];
if (!disk_layout_raw) {
- gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
- "error no layout on disk for merge");
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ NULL);
return -1;
}
@@ -287,10 +283,8 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
case DHT_HASH_TYPE_DM:
break;
default:
- gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT,
- "Invalid disk layout: "
- "Catastrophic error layout with unknown type found %d",
- disk_layout[1]);
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT,
+ "layout=%d", disk_layout[1], NULL);
return -1;
}
@@ -302,9 +296,10 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
layout->list[pos].start = start_off;
layout->list[pos].stop = stop_off;
- gf_msg_trace(
- this->name, 0, "merged to layout: %u - %u (type %d, hash %d) from %s",
- start_off, stop_off, commit_hash, type, layout->list[pos].xlator->name);
+ gf_msg_trace(this->name, 0,
+ "merged to layout: 0x%x - 0x%x (hash 0x%x, type %d) from %s",
+ start_off, stop_off, commit_hash, type,
+ layout->list[pos].xlator->name);
return 0;
}
@@ -357,8 +352,8 @@ dht_layout_merge(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
ret = dht_disk_layout_merge(this, layout, i, disk_layout_raw,
disk_layout_len);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
- "layout merge from subvolume %s failed", subvol->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "subvolume=%s", subvol->name, NULL);
goto out;
}
@@ -417,8 +412,7 @@ dht_layout_range_swap(dht_layout_t *layout, int i, int j)
layout->list[j].start = start_swap;
layout->list[j].stop = stop_swap;
}
-
-int64_t
+static int64_t
dht_layout_entry_cmp_volname(dht_layout_t *layout, int i, int j)
{
return (strcmp(layout->list[i].xlator->name, layout->list[j].xlator->name));
@@ -441,7 +435,7 @@ dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator)
return _gf_false;
}
-int64_t
+static int64_t
dht_layout_entry_cmp(dht_layout_t *layout, int i, int j)
{
int64_t diff = 0;
@@ -477,7 +471,7 @@ dht_layout_sort(dht_layout_t *layout)
return 0;
}
-int
+void
dht_layout_sort_volname(dht_layout_t *layout)
{
int i = 0;
@@ -493,8 +487,6 @@ dht_layout_sort_volname(dht_layout_t *layout)
dht_layout_entry_swap(layout, i, j);
}
}
-
- return 0;
}
void
@@ -627,8 +619,8 @@ dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout)
ret = dht_layout_sort(layout);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
- "sort failed?! how the ....");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
+ NULL);
goto out;
}
@@ -644,10 +636,9 @@ dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout)
" gfid = %s",
loc->path, gfid);
} else {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO,
- "Found anomalies in %s (gfid = %s). "
- "Holes=%d overlaps=%d",
- loc->path, gfid, holes, overlaps);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO,
+ "path=%s", loc->path, "gfid=%s", gfid, "holes=%d", holes,
+ "overlaps=%d", overlaps, NULL);
}
ret = -1;
}
@@ -714,12 +705,11 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if (!xattr) {
if (err == 0) {
if (loc) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DICT_GET_FAILED,
- "%s: xattr dictionary is NULL", loc->path);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path=%s", loc->path, NULL);
} else {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DICT_GET_FAILED,
- "path not found: "
- "xattr dictionary is NULL");
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path not found", NULL);
}
ret = -1;
}
@@ -731,13 +721,13 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if (dict_ret < 0) {
if (err == 0 && layout->list[pos].stop) {
if (loc) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
- "%s: Disk layout missing, gfid = %s", loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
} else {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
- "path not found: "
- "Disk layout missing, gfid = %s",
- gfid);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path not found"
+ "gfid=%s",
+ gfid, NULL);
}
ret = -1;
}
@@ -753,13 +743,13 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if ((layout->list[pos].start != start_off) ||
(layout->list[pos].stop != stop_off) ||
(layout->list[pos].commit_hash != commit_hash)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO,
- "subvol: %s; inode layout - %" PRIu32 " - %" PRIu32 " - %" PRIu32
- "; "
- "disk layout - %" PRIu32 " - %" PRIu32 " - %" PRIu32,
- layout->list[pos].xlator->name, layout->list[pos].start,
- layout->list[pos].stop, layout->list[pos].commit_hash, start_off,
- stop_off, commit_hash);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO, "subvol=%s",
+ layout->list[pos].xlator->name, "inode-layout:start=0x%x",
+ layout->list[pos].start, "inode-layout:stop=0x%x",
+ layout->list[pos].stop, "layout-commit-hash=0x%x; ",
+ layout->list[pos].commit_hash, "disk-layout:start-off=0x%x",
+ start_off, "disk-layout:top-off=0x%x", stop_off,
+ "commit-hash=0x%x", commit_hash, NULL);
ret = 1;
} else {
ret = 0;
@@ -781,9 +771,8 @@ dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode)
layout = dht_layout_for_subvol(this, subvol);
if (!layout) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO,
- "no pre-set layout for subvolume %s",
- subvol ? subvol->name : "<nil>");
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO,
+ "subvolume=%s", subvol ? subvol->name : "<nil>", NULL);
ret = -1;
goto out;
}
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index e17c354bea6..89ec6cca56e 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -8,13 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/glusterfs.h>
-#include <glusterfs/xlator.h>
#include <glusterfs/compat.h>
#include "dht-common.h"
-#include "dht-messages.h"
-int
+static int
dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr,
@@ -37,17 +34,16 @@ dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
if (!is_linkfile)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
- "got non-linkfile %s:%s, gfid = %s", prev->name, local->loc.path,
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
+ "name=%s", prev->name, "path=%s", local->loc.path, "gfid=%s",
+ gfid, NULL);
out:
local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode,
stbuf, postparent, postparent, xattr);
return 0;
}
-#define is_equal(a, b) ((a) == (b))
-int
+static int
dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -76,9 +72,8 @@ dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value. key : %s",
- conf->link_xattr_name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "mame=%s", conf->link_xattr_name, NULL);
goto out;
}
@@ -128,27 +123,23 @@ dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
if (ret)
- gf_msg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: "
- "key = gfid-req, gfid = %s ",
- loc->path, gfid);
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
} else {
gf_uuid_unparse(loc->gfid, gfid);
}
ret = dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
if (ret)
- gf_msg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s",
- loc->path, GLUSTERFS_INTERNAL_FOP_KEY, gfid);
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
ret = dict_set_str(dict, conf->link_xattr_name, tovol->name);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED,
- "%s: failed to initialize linkfile data, gfid = %s", loc->path,
- gfid);
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
goto out;
}
@@ -189,10 +180,9 @@ dht_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED,
- "Unlinking linkfile %s (gfid = %s)on "
- "subvolume %s failed ",
- local->loc.path, gfid, subvol->name);
+ gf_smsg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, "subvolume=%s",
+ subvol->name, NULL);
}
DHT_STACK_DESTROY(frame);
@@ -260,7 +250,7 @@ out:
return subvol;
}
-int
+static int
dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
struct iatt *statpost, dict_t *xdata)
@@ -272,10 +262,9 @@ dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
loc = &local->loc;
if (op_ret)
- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED,
- "Failed to set attr uid/gid on %s"
- " :<gfid:%s> ",
- (loc->path ? loc->path : "NULL"), uuid_utoa(local->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED,
+ "path=%s", (loc->path ? loc->path : "NULL"), "gfid=%s",
+ uuid_utoa(local->gfid), NULL);
DHT_STACK_DESTROY(frame);
diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c
index f9bac4f97c8..638821ccee5 100644
--- a/xlators/cluster/dht/src/dht-lock.c
+++ b/xlators/cluster/dht/src/dht-lock.c
@@ -44,7 +44,8 @@ dht_log_lk_array(char *name, gf_loglevel_t log_level, dht_lock_t **lk_array,
if (!lk_buf)
goto out;
- gf_msg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "%d. %s", i, lk_buf);
+ gf_smsg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "index=%d", i,
+ "lk_buf=%s", lk_buf, NULL);
GF_FREE(lk_buf);
}
@@ -313,11 +314,9 @@ dht_unlock_entrylk_done(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, op_errno,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "unlock failed on gfid: %s, stale lock might be left "
- "in DHT_LAYOUT_HEAL_DOMAIN",
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "gfid=%s", gfid,
+ "DHT_LAYOUT_HEAL_DOMAIN", NULL);
}
DHT_STACK_DESTROY(frame);
@@ -339,9 +338,10 @@ dht_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
uuid_utoa_r(local->lock[0].ns.directory_ns.locks[lk_index]->loc.gfid, gfid);
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
- "unlocking failed on %s:%s",
- local->lock[0].ns.directory_ns.locks[lk_index]->xl->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].ns.directory_ns.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
} else {
local->lock[0].ns.directory_ns.locks[lk_index]->locked = 0;
}
@@ -375,9 +375,9 @@ dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
lock_frame = dht_lock_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "cannot allocate a frame, not unlocking following "
- "entrylks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
goto done;
@@ -385,9 +385,9 @@ dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "storing locks in local failed, not unlocking "
- "following entrylks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
@@ -446,21 +446,17 @@ dht_unlock_entrylk_wrapper(call_frame_t *frame, dht_elock_wrap_t *entrylk)
lock_frame = copy_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "copy frame failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
if (lock_local == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "local creation failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
@@ -700,9 +696,10 @@ dht_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
uuid_utoa_r(local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid,
gfid);
- gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
- "unlocking failed on %s:%s",
- local->lock[0].layout.my_layout.locks[lk_index]->xl->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
} else {
local->lock[0].layout.my_layout.locks[lk_index]->locked = 0;
}
@@ -727,11 +724,9 @@ dht_unlock_inodelk_done(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, op_errno,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "unlock failed on gfid: %s, stale lock might be left "
- "in DHT_LAYOUT_HEAL_DOMAIN",
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "DHT_LAYOUT_HEAL_DOMAIN gfid=%s",
+ gfid, NULL);
}
DHT_STACK_DESTROY(frame);
@@ -762,9 +757,9 @@ dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
lock_frame = dht_lock_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "cannot allocate a frame, not unlocking following "
- "locks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
goto done;
@@ -772,9 +767,9 @@ dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "storing locks in local failed, not unlocking "
- "following locks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
@@ -834,21 +829,17 @@ dht_unlock_inodelk_wrapper(call_frame_t *frame, dht_ilock_wrap_t *inodelk)
lock_frame = copy_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "copy frame failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
if (lock_local == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "local creation failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "gfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
@@ -1039,13 +1030,12 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
local->lock[0].layout.my_layout.op_ret = -1;
local->lock[0].layout.my_layout.op_errno = op_errno;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_INODELK_FAILED,
- "inodelk failed on subvol %s. gfid:%s",
- local->lock[0]
- .layout.my_layout.locks[lk_index]
- ->xl->name,
- gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
goto cleanup;
}
break;
@@ -1060,13 +1050,12 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
local->lock[0].layout.my_layout.op_ret = -1;
local->lock[0].layout.my_layout.op_errno = op_errno;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_INODELK_FAILED,
- "inodelk failed on subvol %s. gfid:%s",
- local->lock[0]
- .layout.my_layout.locks[lk_index]
- ->xl->name,
- gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
goto cleanup;
}
break;
@@ -1077,11 +1066,11 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
local->lock[0].layout.my_layout.op_ret = -1;
local->lock[0].layout.my_layout.op_errno = op_errno;
- gf_msg(
+ gf_smsg(
this->name, GF_LOG_ERROR, op_errno, DHT_MSG_INODELK_FAILED,
- "inodelk failed on subvol %s, gfid:%s",
+ "subvol=%s",
local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
- gfid);
+ "gfid=%s", gfid, NULL);
goto cleanup;
}
}
@@ -1153,19 +1142,16 @@ dht_blocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
lock_frame = dht_lock_frame(frame);
if (lock_frame == NULL) {
gf_uuid_unparse(tmp_local->loc.gfid, gfid);
- gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED,
- "memory allocation failed for lock_frame. gfid:%s"
- " path:%s",
- gfid, tmp_local->loc.path);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
goto out;
}
ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
if (ret < 0) {
gf_uuid_unparse(tmp_local->loc.gfid, gfid);
- gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
- "dht_local_lock_init failed, gfid: %s path:%s", gfid,
- tmp_local->loc.path);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
goto out;
}
@@ -1246,11 +1232,10 @@ dht_blocking_entrylk_after_inodelk(call_frame_t *frame, void *cookie,
if (ret < 0) {
local->op_ret = -1;
local->op_errno = EIO;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_ENTRYLK_ERROR,
- "%s (%s/%s): "
- "dht_blocking_entrylk failed after taking inodelk",
- gf_fop_list[local->fop], pgfid, entrylk->locks[0]->basename);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_ENTRYLK_FAILED_AFT_INODELK, "fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "basename=%s",
+ entrylk->locks[0]->basename, NULL);
goto err;
}
@@ -1310,10 +1295,9 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
ret = dht_build_parent_loc(this, &parent, loc, &op_errno);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED,
- "gfid:%s (name:%s) (path: %s): "
- "parent loc build failed",
- loc->gfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED,
+ "gfid=%s", loc->gfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
goto out;
}
gf_uuid_unparse(parent.gfid, pgfid);
@@ -1322,10 +1306,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
inodelk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
if (inodelk->locks == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "calloc failure",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
goto out;
}
@@ -1334,10 +1318,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
FAIL_ON_ANY_ERROR);
if (inodelk->locks[0] == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "inodelk: lock allocation failed",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "inodelk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
goto err;
}
inodelk->lk_count = count;
@@ -1346,10 +1330,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
entrylk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
if (entrylk->locks == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "entrylk: calloc failure",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
goto err;
}
@@ -1359,10 +1343,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
FAIL_ON_ANY_ERROR);
if (entrylk->locks[0] == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "entrylk: lock allocation failed",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
goto err;
}
@@ -1376,11 +1360,11 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
dht_blocking_entrylk_after_inodelk);
if (ret < 0) {
local->op_errno = EIO;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_INODELK_ERROR,
- "%s (%s/%s) (path: %s): "
- "dht_blocking_inodelk failed",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_BLOCK_INODELK_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
+
goto err;
}
diff --git a/xlators/cluster/dht/src/dht-lock.h b/xlators/cluster/dht/src/dht-lock.h
index 1cea8ae51ea..6485c03fb6e 100644
--- a/xlators/cluster/dht/src/dht-lock.h
+++ b/xlators/cluster/dht/src/dht-lock.h
@@ -11,7 +11,6 @@
#ifndef _DHT_LOCK_H
#define _DHT_LOCK_H
-#include <glusterfs/xlator.h>
#include "dht-common.h"
void
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 33f9832395b..e3c4471334a 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -30,10 +30,7 @@ enum gf_dht_mem_types_ {
gf_dht_mt_container_t,
gf_dht_mt_octx_t,
gf_dht_mt_miginfo_t,
- gf_tier_mt_bricklist_t,
- gf_tier_mt_ipc_ctr_params_t,
gf_dht_mt_fd_ctx_t,
- gf_tier_mt_qfile_array_t,
gf_dht_ret_cache_t,
gf_dht_nodeuuids_t,
gf_dht_mt_end
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
index afc7460151b..601f8dad78b 100644
--- a/xlators/cluster/dht/src/dht-messages.h
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -38,12 +38,11 @@ GLFS_MSGID(
DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED,
DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES,
DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED,
- DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, DHT_MSG_LOG_TIER_ERROR,
- DHT_MSG_LOG_TIER_STATUS, DHT_MSG_GET_XATTR_FAILED,
- DHT_MSG_FILE_LOOKUP_FAILED, DHT_MSG_OPEN_FD_FAILED,
- DHT_MSG_SET_INODE_CTX_FAILED, DHT_MSG_UNLOCKING_FAILED,
- DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, DHT_MSG_CHUNK_SIZE_INFO,
- DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
+ DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT,
+ DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED,
+ DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED,
+ DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO,
+ DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED,
DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED,
DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED,
@@ -69,8 +68,7 @@ GLFS_MSGID(
DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED,
DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR,
DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO,
- DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_LOG_IPC_TIER_ERROR,
- DHT_MSG_TIER_PAUSED, DHT_MSG_TIER_RESUME, DHT_MSG_SETTLE_HASH_FAILED,
+ DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED,
DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED,
DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED,
DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED,
@@ -79,6 +77,310 @@ GLFS_MSGID(
DHT_MSG_ENTRYLK_ERROR, DHT_MSG_INODELK_ERROR, DHT_MSG_LOC_FAILED,
DHT_MSG_UNKNOWN_FOP, DHT_MSG_MIGRATE_FILE_SKIPPED,
DHT_MSG_DIR_XATTR_HEAL_FAILED, DHT_MSG_HASHED_SUBVOL_DOWN,
- DHT_MSG_NON_HASHED_SUBVOL_DOWN);
+ DHT_MSG_NON_HASHED_SUBVOL_DOWN, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ DHT_MSG_DIR_HEAL_ABORT, DHT_MSG_MIGRATE_SKIP, DHT_MSG_FD_CREATE_FAILED,
+ DHT_MSG_DICT_NEW_FAILED, DHT_MSG_FAILED_TO_OPEN, DHT_MSG_CREATE_FAILED,
+ DHT_MSG_FILE_NOT_EXIST, DHT_MSG_CHOWN_FAILED, DHT_MSG_FALLOCATE_FAILED,
+ DHT_MSG_FTRUNCATE_FAILED, DHT_MSG_STATFS_FAILED, DHT_MSG_WRITE_CROSS,
+ DHT_MSG_NEW_TARGET_FOUND, DHT_MSG_INSUFF_MEMORY, DHT_MSG_SET_XATTR_FAILED,
+ DHT_MSG_SET_MODE_FAILED, DHT_MSG_FILE_EXISTS_IN_DEST,
+ DHT_MSG_SYMLINK_FAILED, DHT_MSG_LINKFILE_DEL_FAILED, DHT_MSG_MKNOD_FAILED,
+ DHT_MSG_MIGRATE_CLEANUP_FAILED, DHT_MSG_LOCK_MIGRATE,
+ DHT_MSG_PARENT_BUILD_FAILED, DHT_MSG_HASHED_SUBVOL_NOT_FOUND,
+ DHT_MSG_ACQUIRE_ENTRYLK_FAILED, DHT_MSG_CREATE_DST_FAILED,
+ DHT_MSG_MIGRATION_EXIT, DHT_MSG_CHANGED_DST, DHT_MSG_TRACE_FAILED,
+ DHT_MSG_WRITE_LOCK_FAILED, DHT_MSG_GETACTIVELK_FAILED, DHT_MSG_STAT_FAILED,
+ DHT_MSG_UNLINK_PERFORM_FAILED, DHT_MSG_CLANUP_SOURCE_FILE_FAILED,
+ DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED,
+ DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL,
+ DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED,
+ DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP,
+ DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID,
+ DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED,
+ DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED,
+ DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE,
+ DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED,
+ DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR,
+ DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR,
+ DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED,
+ DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED,
+ DHT_MSG_STALE_LINKFILE_DELETE, DHT_MSG_NO_SUBVOL_FOR_LINKTO,
+ DHT_MSG_SUBVOL_RETURNED, DHT_MSG_UNKNOWN_LOCAL_XSEL, DHT_MSG_GET_XATTR_ERR,
+ DHT_MSG_ALLOC_OR_FILL_FAILED, DHT_MSG_GET_REAL_NAME_FAILED,
+ DHT_MSG_COPY_UUID_FAILED, DHT_MSG_MDS_DETER_FAILED,
+ DHT_MSG_CREATE_REBAL_FAILED, DHT_MSG_LINK_LAYOUT_FAILED,
+ DHT_MSG_NO_SUBVOL_IN_LAYOUT, DHT_MSG_MEM_ALLOC_FAILED,
+ DHT_MSG_SET_IN_PARAMS_DICT_FAILED, DHT_MSG_LOC_COPY_FAILED,
+ DHT_MSG_PARENT_LOC_FAILED, DHT_MSG_CREATE_LOCK_FAILED,
+ DHT_MSG_PREV_ATTEMPT_FAILED, DHT_MSG_REFRESH_ATTEMPT,
+ DHT_MSG_ACQUIRE_LOCK_FAILED, DHT_MSG_CREATE_STUB_FAILED,
+ DHT_MSG_WIND_LOCK_REQ_FAILED, DHT_MSG_REFRESH_FAILED,
+ DHT_MSG_CACHED_SUBVOL_ERROR, DHT_MSG_NO_LINK_SUBVOL, DHT_MSG_SET_KEY_FAILED,
+ DHT_MSG_REMOVE_LINKTO_FAILED, DHT_MSG_LAYOUT_DICT_SET_FAILED,
+ DHT_MSG_XATTR_DICT_NULL, DHT_MSG_DUMMY_ALLOC_FAILED, DHT_MSG_DICT_IS_NULL,
+ DHT_MSG_LINK_INODE_FAILED, DHT_MSG_SELFHEAL_FAILED, DHT_MSG_NO_MDS_SUBVOL,
+ DHT_MSG_LIST_XATTRS_FAILED, DHT_MSG_RESET_INTER_XATTR_FAILED,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, DHT_MSG_WIND_UNLOCK_FAILED,
+ DHT_MSG_COMMIT_HASH_FAILED, DHT_MSG_UNLOCK_GFID_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_ENTRYLK, DHT_MSG_COPY_FRAME_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_LOCKS, DHT_MSG_ENTRYLK_FAILED_AFT_INODELK,
+ DHT_MSG_CALLOC_FAILED, DHT_MSG_LOCK_ALLOC_FAILED,
+ DHT_MSG_BLOCK_INODELK_FAILED,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ DHT_MSG_DST_NULL_SET_FAILED);
+
+#define DHT_MSG_FD_CTX_SET_FAILED_STR "Failed to set fd ctx"
+#define DHT_MSG_INVALID_VALUE_STR "Different dst found in the fd ctx"
+#define DHT_MSG_UNKNOWN_FOP_STR "Unknown FOP on file"
+#define DHT_MSG_OPEN_FD_ON_DST_FAILED_STR "Failed to open the fd on file"
+#define DHT_MSG_SYNCTASK_CREATE_FAILED_STR "Failed to create synctask"
+#define DHT_MSG_ASPRINTF_FAILED_STR \
+ "asprintf failed while fetching subvol from the id"
+#define DHT_MSG_HAS_MIGINFO_STR "Found miginfo in the inode ctx"
+#define DHT_MSG_FILE_LOOKUP_FAILED_STR "failed to lookup the file"
+#define DHT_MSG_INVALID_LINKFILE_STR \
+ "linkto target is different from cached-subvol. treating as destination " \
+ "subvol"
+#define DHT_MSG_GFID_MISMATCH_STR "gfid different on the target file"
+#define DHT_MSG_GET_XATTR_FAILED_STR "failed to get 'linkto' xattr"
+#define DHT_MSG_SET_INODE_CTX_FAILED_STR "failed to set inode-ctx target file"
+#define DHT_MSG_DIR_SELFHEAL_FAILED_STR "Healing of path failed"
+#define DHT_MSG_DIR_HEAL_ABORT_STR \
+ "Failed to get path from subvol. Aborting directory healing"
+#define DHT_MSG_DIR_XATTR_HEAL_FAILED_STR "xattr heal failed for directory"
+#define DHT_MSG_LOCK_INODE_UNREF_FAILED_STR \
+ "Found a NULL inode. Failed to unref the inode"
+#define DHT_MSG_DICT_SET_FAILED_STR "Failed to set dictionary value"
+#define DHT_MSG_NOT_LINK_FILE_ERROR_STR "got non-linkfile"
+#define DHT_MSG_CREATE_LINK_FAILED_STR "failed to initialize linkfile data"
+#define DHT_MSG_UNLINK_FAILED_STR "Unlinking linkfile on subvolume failed"
+#define DHT_MSG_MIGRATE_FILE_FAILED_STR "Migrate file failed"
+#define DHT_MSG_NO_MEMORY_STR "could not allocate memory for dict"
+#define DHT_MSG_SUBVOL_ERROR_STR "Failed to get linkto subvol"
+#define DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED_STR "link failed on subvol"
+#define DHT_MSG_MIGRATE_FILE_SKIPPED_STR "Migration skipped"
+#define DHT_MSG_FD_CREATE_FAILED_STR "fd create failed"
+#define DHT_MSG_DICT_NEW_FAILED_STR "dict_new failed"
+#define DHT_MSG_FAILED_TO_OPEN_STR "failed to open"
+#define DHT_MSG_CREATE_FAILED_STR "failed to create"
+#define DHT_MSG_FILE_NOT_EXIST_STR "file does not exist"
+#define DHT_MSG_CHOWN_FAILED_STR "chown failed"
+#define DHT_MSG_FALLOCATE_FAILED_STR "fallocate failed"
+#define DHT_MSG_FTRUNCATE_FAILED_STR "ftruncate failed"
+#define DHT_MSG_STATFS_FAILED_STR "failed to get statfs"
+#define DHT_MSG_WRITE_CROSS_STR \
+ "write will cross min-fre-disk for file on subvol. looking for new subvol"
+#define DHT_MSG_SUBVOL_INSUFF_SPACE_STR \
+ "Could not find any subvol with space accommodating the file. Cosider " \
+ "adding bricks"
+#define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file"
+#define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory"
+#define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr"
+#define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode"
+#define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination"
+#define DHT_MSG_LINKFILE_DEL_FAILED_STR "failed to delete the linkfile"
+#define DHT_MSG_SYMLINK_FAILED_STR "symlink failed"
+#define DHT_MSG_MKNOD_FAILED_STR "mknod failed"
+#define DHT_MSG_SETATTR_FAILED_STR "failed to perform setattr"
+#define DHT_MSG_MIGRATE_CLEANUP_FAILED_STR \
+ "Migrate file cleanup failed: failed to fstat file"
+#define DHT_MSG_LOCK_MIGRATE_STR "locks will be migrated for file"
+#define DHT_MSG_PARENT_BUILD_FAILED_STR \
+ "failed to build parent loc, which is needed to acquire entrylk to " \
+ "synchronize with renames on this path. Skipping migration"
+#define DHT_MSG_HASHED_SUBVOL_NOT_FOUND_STR \
+ "cannot find hashed subvol which is needed to synchronize with renames " \
+ "on this path. Skipping migration"
+#define DHT_MSG_ACQUIRE_ENTRYLK_FAILED_STR "failed to acquire entrylk on subvol"
+#define DHT_MSG_CREATE_DST_FAILED_STR "create dst failed for file"
+#define DHT_MSG_MIGRATION_EXIT_STR "Exiting migration"
+#define DHT_MSG_CHANGED_DST_STR "destination changed fo file"
+#define DHT_MSG_TRACE_FAILED_STR "Trace failed"
+#define DHT_MSG_WRITE_LOCK_FAILED_STR "write lock failed"
+#define DHT_MSG_GETACTIVELK_FAILED_STR "getactivelk failed for file"
+#define DHT_MSG_STAT_FAILED_STR "failed to do a stat"
+#define DHT_MSG_UNLINK_PERFORM_FAILED_STR "failed to perform unlink"
+#define DHT_MSG_MIGRATE_FILE_COMPLETE_STR "completed migration"
+#define DHT_MSG_CLANUP_SOURCE_FILE_FAILED_STR "failed to cleanup source file"
+#define DHT_MSG_UNLOCK_FILE_FAILED_STR "failed to unlock file"
+#define DHT_MSG_REMOVE_XATTR_FAILED_STR "remove xattr failed"
+#define DHT_MSG_SOCKET_ERROR_STR "Failed to unlink listener socket"
+#define DHT_MSG_HASHED_SUBVOL_GET_FAILED_STR "Failed to get hashed subvolume"
+#define DHT_MSG_CACHED_SUBVOL_GET_FAILED_STR "Failed to get cached subvolume"
+#define DHT_MSG_MIGRATE_DATA_FAILED_STR "migrate-data failed"
+#define DHT_MSG_DEFRAG_NULL_STR "defrag is NULL"
+#define DHT_MSG_DATA_MIGRATE_ABORT_STR \
+ "Readdirp failed. Aborting data migration for dict"
+#define DHT_MSG_LAYOUT_FIX_FAILED_STR "fix layout failed"
+#define DHT_MSG_PARENT_NULL_STR "parent is NULL"
+#define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present"
+#define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed"
+#define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup"
+#define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed"
+#define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping"
+#define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout"
+#define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed"
+#define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed"
+#define DHT_MSG_FIX_NOT_COMP_STR \
+ "Unable to retrieve fixlayout xattr. Assume background fix layout not " \
+ "complete"
+#define DHT_MSG_SUBVOL_DETER_FAILED_STR \
+ "local subvolume determination failed with error"
+#define DHT_MSG_LOCAL_SUBVOL_STR "local subvol"
+#define DHT_MSG_NODE_UUID_STR "node uuid"
+#define DHT_MSG_SIZE_FILE_STR "Total size files"
+#define DHT_MSG_GET_DATA_SIZE_FAILED_STR \
+ "Failed to get the total data size. Unable to estimate time to complete " \
+ "rebalance"
+#define DHT_MSG_PTHREAD_JOIN_FAILED_STR \
+ "file_counter_thread: pthread_join failed"
+#define DHT_MSG_COUNTER_THREAD_CREATE_FAILED_STR \
+ "Failed to create the file counter thread"
+#define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \
+ "Failed to initialise migration queue"
+#define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance"
+#define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout"
+#define DHT_MSG_WOKE_STR "woken"
+#define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance"
+#define DHT_MSG_REBALANCE_START_FAILED_STR \
+ "Failed to start rebalance: look up on / failed"
+#define DHT_MSG_CREATE_TASK_REBAL_FAILED_STR \
+ "Could not create task for rebalance"
+#define DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL_STR \
+ "Rebalance estimates will not be available"
+#define DHT_MSG_REBALANCE_STATUS_STR "Rebalance status"
+#define DHT_MSG_DATA_NULL_STR "data value is NULL"
+#define DHT_MSG_ADD_CHOICES_ERROR_STR "Error to add choices in buffer"
+#define DHT_MSG_GET_CHOICES_ERROR_STR "Error to get choices"
+#define DHT_MSG_PREPARE_STATUS_ERROR_STR "Error to prepare status"
+#define DHT_MSG_SET_CHOICE_FAILED_STR "Failed to set full choice"
+#define DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED_STR \
+ "Failed to aggregate quota xattr"
+#define DHT_MSG_FILE_TYPE_MISMATCH_STR \
+ "path exists as a file on one subvolume and directory on another. Please " \
+ "fix it manually"
+#define DHT_MSG_LAYOUT_SET_FAILED_STR "failed to set layout for subvolume"
+#define DHT_MSG_LAYOUT_MERGE_FAILED_STR "failed to merge layouts for subvolume"
+#define DHT_MSG_SET_HASHED_SUBVOL_FAILED_STR "Failed to set hashed subvolume"
+#define DHT_MSG_XATTR_HEAL_NOT_POSS_STR \
+ "No gfid exists for path. so healing xattr is not possible"
+#define DHT_MSG_REVALIDATE_CBK_INFO_STR "Revalidate: subvolume returned -1"
+#define DHT_MSG_LAYOUT_MISMATCH_STR "Mismatching layouts"
+#define DHT_MSG_UNLINK_LOOKUP_INFO_STR "lookup_unlink retuened"
+#define DHT_MSG_LINKTO_FILE_FAILED_STR \
+ "Could not unlink the linkto file as either fd is open and/or linkto " \
+ "xattr is set"
+#define DHT_MSG_LAYOUT_PRESET_FAILED_STR \
+ "Could not set pre-set layout for subvolume"
+#define DHT_MSG_FILE_ON_MULT_SUBVOL_STR \
+ "multiple subvolumes have file (preferably rename the file in the " \
+ "backend, and do a fresh lookup"
+#define DHT_MSG_STALE_LINKFILE_DELETE_STR \
+ "attempting deletion of stale linkfile"
+#define DHT_MSG_LINK_FILE_LOOKUP_INFO_STR "Lookup on following linkfile"
+#define DHT_MSG_NO_SUBVOL_FOR_LINKTO_STR "No link subvolume for linkto"
+#define DHT_MSG_SUBVOL_RETURNED_STR "Subvolume returned -1"
+#define DHT_MSG_UNKNOWN_LOCAL_XSEL_STR "Unknown local->xsel"
+#define DHT_MSG_DICT_GET_FAILED_STR "Failed to get"
+#define DHT_MSG_UUID_PARSE_ERROR_STR "Failed to parse uuid"
+#define DHT_MSG_GET_XATTR_ERR_STR "getxattr err for dir"
+#define DHT_MSG_ALLOC_OR_FILL_FAILED_STR "alloc or fill failed"
+#define DHT_MSG_UPGRADE_BRICKS_STR \
+ "At least one of the bricks does not support this operation. Please " \
+ "upgrade all bricks"
+#define DHT_MSG_GET_REAL_NAME_FAILED_STR "Failed to get real filename"
+#define DHT_MSG_LAYOUT_NULL_STR "Layout is NULL"
+#define DHT_MSG_COPY_UUID_FAILED_STR "Failed to copy node uuid key"
+#define DHT_MSG_MDS_DETER_FAILED_STR \
+ "Cannot determine MDS, fetching xattr randomly from a subvol"
+#define DHT_MSG_HASHED_SUBVOL_DOWN_STR \
+ "MDS is down for path, so fetching xattr randomly from subvol"
+#define DHT_MSG_CREATE_REBAL_FAILED_STR \
+ "failed to create a new rebalance synctask"
+#define DHT_MSG_FIX_LAYOUT_INFO_STR "fixing the layout"
+#define DHT_MSG_OPERATION_NOT_SUP_STR "wrong directory-spread-count value"
+#define DHT_MSG_LINK_LAYOUT_FAILED_STR "failed to link the layout in inode"
+#define DHT_MSG_NO_SUBVOL_IN_LAYOUT_STR "no subvolume in layout for path"
+#define DHT_MSG_INODE_LK_ERROR_STR "mknod lock failed for file"
+#define DHT_MSG_MEM_ALLOC_FAILED_STR "mem allocation failed"
+#define DHT_MSG_PARENT_LAYOUT_CHANGED_STR \
+ "extracting in-memory layout of parent failed"
+#define DHT_MSG_SET_IN_PARAMS_DICT_FAILED_STR \
+ "setting in params dictionary failed"
+#define DHT_MSG_LOC_COPY_FAILED_STR "loc_copy failed"
+#define DHT_MSG_LOC_FAILED_STR "parent loc build failed"
+#define DHT_MSG_PARENT_LOC_FAILED_STR "locking parent failed"
+#define DHT_MSG_CREATE_LOCK_FAILED_STR "Create lock failed"
+#define DHT_MSG_PREV_ATTEMPT_FAILED_STR \
+ "mkdir loop detected. parent layout didn't change even though previous " \
+ "attempt of mkdir failed because of in-memory layout not matching with " \
+ "that on disk."
+#define DHT_MSG_REFRESH_ATTEMPT_STR \
+ "mkdir parent layout changed. Attempting a refresh and then a retry"
+#define DHT_MSG_ACQUIRE_LOCK_FAILED_STR \
+ "Acquiring lock on parent to guard against layout-change failed"
+#define DHT_MSG_CREATE_STUB_FAILED_STR "creating stub failed"
+#define DHT_MSG_WIND_LOCK_REQ_FAILED_STR \
+ "cannot wind lock request to guard parent layout"
+#define DHT_MSG_REFRESH_FAILED_STR "refreshing parent layout failed."
+#define DHT_MSG_CACHED_SUBVOL_ERROR_STR "On cached subvol"
+#define DHT_MSG_NO_LINK_SUBVOL_STR "Linkfile does not have link subvolume"
+#define DHT_MSG_SET_KEY_FAILED_STR "failed to set key"
+#define DHT_MSG_CHILD_DOWN_STR "Received CHILD_DOWN. Exiting"
+#define DHT_MSG_LOG_FIXED_LAYOUT_STR "log layout fixed"
+#define DHT_MSG_REBAL_STRUCT_SET_STR "local->rebalance already set"
+#define DHT_MSG_REMOVE_LINKTO_FAILED_STR "Removal of linkto failed at subvol"
+#define DHT_MSG_LAYOUT_DICT_SET_FAILED_STR "dht layout dict set failed"
+#define DHT_MSG_SUBVOL_INFO_STR "creating subvolume"
+#define DHT_MSG_COMPUTE_HASH_FAILED_STR "hash computation failed"
+#define DHT_MSG_INVALID_DISK_LAYOUT_STR \
+ "Invalid disk layout: Catastrophic error layout with unknown type found"
+#define DHT_MSG_LAYOUT_SORT_FAILED_STR "layout sort failed"
+#define DHT_MSG_ANOMALIES_INFO_STR "Found anomalies"
+#define DHT_MSG_XATTR_DICT_NULL_STR "xattr dictionary is NULL"
+#define DHT_MSG_DISK_LAYOUT_MISSING_STR "Disk layout missing"
+#define DHT_MSG_LAYOUT_INFO_STR "layout info"
+#define DHT_MSG_SUBVOL_NO_LAYOUT_INFO_STR "no pre-set layout for subvol"
+#define DHT_MSG_SELFHEAL_XATTR_FAILED_STR "layout setxattr failed"
+#define DHT_MSG_DIR_SELFHEAL_XATTR_FAILED_STR "Directory self heal xattr failed"
+#define DHT_MSG_DUMMY_ALLOC_FAILED_STR "failed to allocate dummy layout"
+#define DHT_MSG_DICT_IS_NULL_STR \
+ "dict is NULL, need to make sure gfids are same"
+#define DHT_MSG_ENTRYLK_ERROR_STR "acquiring entrylk after inodelk failed"
+#define DHT_MSG_NO_DISK_USAGE_STATUS_STR "no du stats"
+#define DHT_MSG_LINK_INODE_FAILED_STR "linking inode failed"
+#define DHT_MSG_SELFHEAL_FAILED_STR "Directory selfheal failed"
+#define DHT_MSG_NO_MDS_SUBVOL_STR "No mds subvol"
+#define DHT_MSG_LIST_XATTRS_FAILED_STR "failed to list xattrs"
+#define DHT_MSG_RESET_INTER_XATTR_FAILED_STR "Failed to reset internal xattr"
+#define DHT_MSG_MDS_DOWN_UNABLE_TO_SET_STR \
+ "mds subvol is down, unable to set xattr"
+#define DHT_MSG_DIR_ATTR_HEAL_FAILED_STR \
+ "Directory attr heal failed. Failed to set uid/gid"
+#define DHT_MSG_WIND_UNLOCK_FAILED_STR \
+ "Winding unlock failed: stale locks left on brick"
+#define DHT_MSG_COMMIT_HASH_FAILED_STR "Directory commit hash updaten failed"
+#define DHT_MSG_LK_ARRAY_INFO_STR "lk info"
+#define DHT_MSG_UNLOCK_GFID_FAILED_STR \
+ "unlock failed on gfid: stale lock might be left"
+#define DHT_MSG_UNLOCKING_FAILED_STR "unlocking failed"
+#define DHT_MSG_UNLOCK_FOLLOW_ENTRYLK_STR "not unlocking following entrylks"
+#define DHT_MSG_COPY_FRAME_FAILED_STR "copy frame failed"
+#define DHT_MSG_UNLOCK_FOLLOW_LOCKS_STR "not unlocking following locks"
+#define DHT_MSG_INODELK_FAILED_STR "inodelk failed on subvol"
+#define DHT_MSG_LOCK_FRAME_FAILED_STR "memory allocation failed for lock_frame"
+#define DHT_MSG_LOCAL_LOCK_INIT_FAILED_STR "dht_local_lock_init failed"
+#define DHT_MSG_ENTRYLK_FAILED_AFT_INODELK_STR \
+ "dht_blocking_entrylk failed after taking inodelk"
+#define DHT_MSG_BLOCK_INODELK_FAILED_STR "dht_blocking_inodelk failed"
+#define DHT_MSG_CALLOC_FAILED_STR "calloc failed"
+#define DHT_MSG_LOCK_ALLOC_FAILED_STR "lock allocation failed"
+#define DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS_STR \
+ "cannot allocate a frame, not unlocking following entrylks"
+#define DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK_STR \
+ "storing locks in local failed, not unlocking following entrylks"
+#define DHT_MSG_DST_NULL_SET_FAILED_STR \
+ "src or dst is NULL, Failed to set dictionary value"
#endif /* _DHT_MESSAGES_H_ */
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index e0f25b1d080..8ba8082bd86 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -9,16 +9,15 @@
*/
#include "dht-common.h"
-#include <glusterfs/xlator.h>
#include <glusterfs/syscall.h>
-#include <signal.h>
#include <fnmatch.h>
#include <signal.h>
#include <glusterfs/events.h>
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
#define GF_DISK_SECTOR_SIZE 512
-#define DHT_REBALANCE_PID 4242 /* Change it if required */
-#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */
+#define DHT_REBALANCE_PID 4242 /* Change it if required */
+#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */
#define MAX_MIGRATE_QUEUE_COUNT 500
#define MIN_MIGRATE_QUEUE_COUNT 200
#define MAX_REBAL_TYPE_SIZE 16
@@ -46,7 +45,10 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
if (meta) {
for (i = 0; i < local_subvols_cnt; i++) {
- gf_dirent_free(&meta->equeue[i]);
+ if (meta->equeue)
+ gf_dirent_free(&meta->equeue[i]);
+ if (meta->lfd && meta->lfd[i])
+ fd_unref(meta->lfd[i]);
}
GF_FREE(meta->equeue);
@@ -54,6 +56,7 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
GF_FREE(meta->iterator);
GF_FREE(meta->offset_var);
GF_FREE(meta->fetch_entries);
+ GF_FREE(meta->lfd);
GF_FREE(meta);
}
}
@@ -85,26 +88,6 @@ dht_set_global_defrag_error(gf_defrag_info_t *defrag, int ret)
return;
}
-static gf_boolean_t
-dht_is_tier_command(int cmd)
-{
- gf_boolean_t is_tier = _gf_false;
-
- switch (cmd) {
- case GF_DEFRAG_CMD_START_TIER:
- case GF_DEFRAG_CMD_STATUS_TIER:
- case GF_DEFRAG_CMD_START_DETACH_TIER:
- case GF_DEFRAG_CMD_STOP_DETACH_TIER:
- case GF_DEFRAG_CMD_PAUSE_TIER:
- case GF_DEFRAG_CMD_RESUME_TIER:
- is_tier = _gf_true;
- break;
- default:
- break;
- }
- return is_tier;
-}
-
static int
dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
{
@@ -113,8 +96,6 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
char *tmpstr = NULL;
char *ptr = NULL;
char *suffix = "-dht";
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
int len = 0;
eventtypes_t event = EVENT_LAST;
@@ -133,21 +114,14 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
break;
}
- if (dht_is_tier_command(cmd)) {
- /* We should have the tier volume name*/
- conf = this->private;
- defrag = conf->defrag;
- volname = defrag->tier_conf.volname;
- } else {
- /* DHT volume */
- len = strlen(this->name) - strlen(suffix);
- tmpstr = gf_strdup(this->name);
- if (tmpstr) {
- ptr = tmpstr + len;
- if (!strcmp(ptr, suffix)) {
- tmpstr[len] = '\0';
- volname = tmpstr;
- }
+ /* DHT volume */
+ len = strlen(this->name) - strlen(suffix);
+ tmpstr = gf_strdup(this->name);
+ if (tmpstr) {
+ ptr = tmpstr + len;
+ if (!strcmp(ptr, suffix)) {
+ tmpstr[len] = '\0';
+ volname = tmpstr;
}
}
@@ -173,75 +147,6 @@ dht_strip_out_acls(dict_t *dict)
}
}
-static int
-dht_write_with_holes(xlator_t *to, fd_t *fd, struct iovec *vec, int count,
- int32_t size, off_t offset, struct iobref *iobref,
- int *fop_errno)
-{
- int i = 0;
- int ret = -1;
- int start_idx = 0;
- int tmp_offset = 0;
- int write_needed = 0;
- int buf_len = 0;
- int size_pending = 0;
- char *buf = NULL;
-
- /* loop through each vector */
- for (i = 0; i < count; i++) {
- buf = vec[i].iov_base;
- buf_len = vec[i].iov_len;
-
- for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len;
- start_idx += GF_DISK_SECTOR_SIZE) {
- if (mem_0filled(buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) {
- write_needed = 1;
- continue;
- }
-
- if (write_needed) {
- ret = syncop_write(
- to, fd, (buf + tmp_offset), (start_idx - tmp_offset),
- (offset + tmp_offset), iobref, 0, NULL, NULL);
- /* 'path' will be logged in calling function */
- if (ret < 0) {
- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)",
- strerror(-ret));
- *fop_errno = -ret;
- ret = -1;
- goto out;
- }
-
- write_needed = 0;
- }
- tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
- }
-
- if ((start_idx < buf_len) || write_needed) {
- /* This means, last chunk is not yet written.. write it */
- ret = syncop_write(to, fd, (buf + tmp_offset),
- (buf_len - tmp_offset), (offset + tmp_offset),
- iobref, 0, NULL, NULL);
- if (ret < 0) {
- /* 'path' will be logged in calling function */
- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)",
- strerror(-ret));
- *fop_errno = -ret;
- ret = -1;
- goto out;
- }
- }
-
- size_pending = (size - buf_len);
- if (!size_pending)
- break;
- }
-
- ret = size;
-out:
- return ret;
-}
-
/*
return values:
-1 : failure
@@ -649,7 +554,7 @@ out:
static int
__dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
- int *fop_errno)
+ int *fop_errno, int file_has_holes)
{
int ret = -1;
int ret2 = -1;
@@ -704,26 +609,23 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
- if (!!dht_is_tier_xlator(this)) {
- xdata = dict_new();
- if (!xdata) {
- *fop_errno = ENOMEM;
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
- DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)",
- loc->path);
- goto out;
- }
+ xdata = dict_new();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: dict_new failed)", loc->path);
+ goto out;
+ }
- ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
- if (ret) {
- *fop_errno = ENOMEM;
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: failed to set dictionary value: key = %s ", loc->path,
- GF_CLEAN_WRITE_PROTECTION);
- goto out;
- }
+ ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ", loc->path,
+ GF_CLEAN_WRITE_PROTECTION);
+ goto out;
}
ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL);
@@ -818,7 +720,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
/* No need to bother about 0 byte size files */
if (stbuf->ia_size > 0) {
- if (conf->use_fallocate) {
+ if (conf->use_fallocate && !file_has_holes) {
ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL);
if (ret < 0) {
if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) {
@@ -845,9 +747,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
}
- }
-
- if (!conf->use_fallocate) {
+ } else {
ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL,
NULL);
if (ret < 0) {
@@ -1098,32 +998,103 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
int ret = 0;
int count = 0;
off_t offset = 0;
+ off_t data_offset = 0;
+ off_t hole_offset = 0;
struct iovec *vector = NULL;
struct iobref *iobref = NULL;
uint64_t total = 0;
size_t read_size = 0;
+ size_t data_block_size = 0;
dict_t *xdata = NULL;
dht_conf_t *conf = NULL;
conf = this->private;
+
/* if file size is '0', no need to enter this loop */
while (total < ia_size) {
- read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
- ? DHT_REBALANCE_BLKSIZE
- : (ia_size - total));
+ /* This is a regular file - read it sequentially */
+ if (!hole_exists) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : (ia_size - total));
+ } else {
+ /* This is a sparse file - read only the data segments in the file
+ */
+
+ /* If the previous data block is fully copied, find the next data
+ * segment
+ * starting at the offset of the last read and written byte, */
+ if (data_block_size <= 0) {
+ ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL,
+ &data_offset);
+ if (ret) {
+ if (ret == -ENXIO)
+ ret = 0; /* No more data segments */
+ else
+ *fop_errno = -ret; /* Error occurred */
+
+ break;
+ }
+
+ /* If the position of the current data segment is greater than
+ * the position of the next hole, find the next hole in order to
+ * calculate the length of the new data segment */
+ if (data_offset > hole_offset) {
+ /* Starting at the offset of the last data segment, find the
+ * next hole */
+ ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE,
+ NULL, &hole_offset);
+ if (ret) {
+ /* If an error occurred here it's a real error because
+ * if the seek for a data segment was successful then
+ * necessarily another hole must exist (EOF is a hole)
+ */
+ *fop_errno = -ret;
+ break;
+ }
+
+ /* Calculate the total size of the current data block */
+ data_block_size = hole_offset - data_offset;
+ }
+ } else {
+ /* There is still data in the current segment, move the
+ * data_offset to the position of the last written byte */
+ data_offset = offset;
+ }
+
+ /* Calculate how much data needs to be read and written. If the data
+ * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
+ * write DHT_REBALANCE_BLKSIZE data length and the rest in the
+ * next iteration(s) */
+ read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : data_block_size);
+
+ /* Calculate the remaining size of the data block - maybe there's no
+ * need to seek for data in the next iteration */
+ data_block_size -= read_size;
+
+ /* Set offset to the offset of the data segment so read and write
+ * will have the correct position */
+ offset = data_offset;
+ }
ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count,
&iobref, NULL, NULL, NULL);
+
if (!ret || (ret < 0)) {
- *fop_errno = -ret;
+ if (!ret) {
+ /* File was probably truncated*/
+ ret = -1;
+ *fop_errno = ENOSPC;
+ } else {
+ *fop_errno = -ret;
+ }
break;
}
- if (hole_exists) {
- ret = dht_write_with_holes(to, dst, vector, count, ret, offset,
- iobref, fop_errno);
- } else {
- if (!conf->force_migration && !dht_is_tier_xlator(this)) {
+ if (!conf->force_migration) {
+ if (!xdata) {
xdata = dict_new();
if (!xdata) {
gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
@@ -1143,7 +1114,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
* https://github.com/gluster/glusterfs/issues/308
* for more details.
*/
- ret = dict_set_int32(xdata, GF_AVOID_OVERWRITE, 1);
+ ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1);
if (ret) {
gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM,
"failed to set dict");
@@ -1152,22 +1123,12 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
break;
}
}
-
- ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
- NULL, xdata, NULL);
- if (ret < 0) {
- *fop_errno = -ret;
- }
- }
-
- if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) &&
- (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) {
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
- "Migrate file paused");
- ret = -1;
}
+ ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
+ NULL, xdata, NULL);
if (ret < 0) {
+ *fop_errno = -ret;
break;
}
@@ -1561,6 +1522,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
xlator_t *old_target = NULL;
xlator_t *hashed_subvol = NULL;
fd_t *linkto_fd = NULL;
+ dict_t *xdata = NULL;
if (from == to) {
gf_msg_debug(this->name, 0,
@@ -1571,20 +1533,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
- /* If defrag is NULL, it should be assumed that migration is triggered
- * from client */
- defrag = conf->defrag;
-
- /* migration of files from clients is restricted to non-tiered clients
- * for now */
- if (!defrag && dht_is_tier_xlator(this)) {
- ret = ENOTSUP;
- goto out;
- }
-
- if (defrag && defrag->tier_conf.is_tier)
- log_level = GF_LOG_TRACE;
-
gf_log(this->name, log_level, "%s: attempting to move from %s to %s",
loc->path, from->name, to->name);
@@ -1627,6 +1575,10 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
loc->path);
}
+ /* The file is locked to prevent a rename during a migration. Renames
+ * and migrations on the file at the same time can lead to data loss.
+ */
+
ret = dht_build_parent_loc(this, &parent_loc, loc, fop_errno);
if (ret < 0) {
ret = -1;
@@ -1727,9 +1679,13 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
/* create the destination, with required modes/xattr */
ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd,
- fop_errno);
+ fop_errno, file_has_holes);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"Create dst failed"
@@ -1773,8 +1729,8 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
* destination. We need to do update this only post migration
* as in case of failure the linkto needs to point to the source
* subvol */
- ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf,
- &dst_fd, fop_errno);
+ ret = __dht_rebalance_create_dst_file(
+ this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
"Create dst failed"
@@ -1861,9 +1817,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
ret = 0;
goto out;
}
- /* Try to preserve 'holes' while migrating data */
- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
- file_has_holes = 1;
ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd,
stbuf.ia_size, file_has_holes,
@@ -1878,7 +1831,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
/* TODO: Sync the locks */
- ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL);
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set last-fsync flag on "
+ "%s (%s)",
+ loc->path, to->name, strerror(ENOMEM));
+ }
+
+ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL);
if (ret) {
gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)",
loc->path, to->name, strerror(-ret));
@@ -2321,14 +2282,12 @@ out:
}
}
- if (!dht_is_tier_xlator(this)) {
- lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES,
- NULL, NULL);
- if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
- gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
- "%s: removexattr failed key %s", loc->path,
- GF_PROTECT_FROM_EXTERNAL_WRITES);
- }
+ lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL,
+ NULL);
+ if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
+ "%s: removexattr failed key %s", loc->path,
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
}
if (dict)
@@ -2341,11 +2300,15 @@ out:
if (dst_fd)
syncop_close(dst_fd);
+
if (src_fd)
syncop_close(src_fd);
if (linkto_fd)
syncop_close(linkto_fd);
+ if (xdata)
+ dict_unref(xdata);
+
loc_wipe(&tmp_loc);
loc_wipe(&parent_loc);
@@ -2440,15 +2403,12 @@ void
dht_build_root_inode(xlator_t *this, inode_t **inode)
{
inode_table_t *itable = NULL;
- uuid_t root_gfid = {
- 0,
- };
+ static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
itable = inode_table_new(0, this);
if (!itable)
return;
- root_gfid[15] = 1;
*inode = inode_find(itable, root_gfid);
}
@@ -2578,10 +2538,10 @@ out:
* all hardlinks.
*/
-int
+gf_boolean_t
gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
{
- int ret = 0;
+ gf_boolean_t ret = _gf_false;
int i = local_subvol_index;
char *str = NULL;
uint32_t hashval = 0;
@@ -2603,12 +2563,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
}
str = uuid_utoa_r(gfid, buf);
- ret = dht_hash_compute(this, 0, str, &hashval);
- if (ret == 0) {
+ if (dht_hash_compute(this, 0, str, &hashval) == 0) {
index = (hashval % entry->count);
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
@@ -2621,12 +2580,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
/* None of the bricks in the subvol are up.
* CHILD_DOWN will kill the process soon */
- return 0;
+ return _gf_false;
}
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
}
@@ -2675,6 +2634,7 @@ gf_defrag_migrate_single_file(void *opaque)
struct iatt *iatt_ptr = NULL;
gf_boolean_t update_skippedcount = _gf_true;
int i = 0;
+ gf_boolean_t should_i_migrate = 0;
rebal_entry = (struct dht_container *)opaque;
if (!rebal_entry) {
@@ -2729,17 +2689,29 @@ gf_defrag_migrate_single_file(void *opaque)
goto out;
}
- if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
- entry->d_stat.ia_gfid)) {
- gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
- goto out;
- }
+ should_i_migrate = gf_defrag_should_i_migrate(
+ this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
gf_uuid_copy(entry_loc.pargfid, loc->gfid);
ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
+
+ if (!should_i_migrate) {
+ /* this node isn't supposed to migrate the file. suppressing any
+ * potential error from lookup as this file is under migration by
+ * another node */
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Ignoring lookup failure: node isn't migrating %s",
+ entry_loc.path);
+ ret = 0;
+ }
+ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+ goto out;
+ }
+
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
"Migrate file failed: %s lookup failed", entry_loc.path);
@@ -2902,8 +2874,7 @@ gf_defrag_migrate_single_file(void *opaque)
if (defrag->stats == _gf_true) {
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
- (end.tv_usec - start.tv_usec);
+ elapsed = gf_tvdiff(&start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration of "
"file:%s size:%" PRIu64
@@ -3082,9 +3053,9 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
dht_conf_t *conf, gf_defrag_info_t *defrag,
fd_t *fd, dict_t *migrate_data,
struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req,
- int *should_commit_hash, int *perrno)
+ int *perrno)
{
- int ret = -1;
+ int ret = 0;
char is_linkfile = 0;
gf_dirent_t *df_entry = NULL;
struct dht_container *tmp_container = NULL;
@@ -3100,6 +3071,13 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
}
if (dir_dfmeta->fetch_entries[i] == 1) {
+ if (!fd) {
+ dir_dfmeta->fetch_entries[i] = 0;
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
+ }
+
ret = syncop_readdirp(conf->local_subvols[i], fd, 131072,
dir_dfmeta->offset_var[i].offset,
&(dir_dfmeta->equeue[i]), xattr_req, NULL);
@@ -3259,7 +3237,6 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *migrate_data, int *perrno)
{
int ret = -1;
- fd_t *fd = NULL;
dht_conf_t *conf = NULL;
gf_dirent_t entries;
dict_t *xattr_req = NULL;
@@ -3280,7 +3257,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
int dfc_index = 0;
int throttle_up = 0;
struct dir_dfmeta *dir_dfmeta = NULL;
- int should_commit_hash = 1;
+ xlator_t *old_THIS = NULL;
gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path);
gettimeofday(&dir_start, NULL);
@@ -3293,28 +3270,53 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
}
- fd = fd_create(loc->inode, defrag->pid);
- if (!fd) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to create fd");
+ old_THIS = THIS;
+ THIS = this;
+
+ dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
+ if (!dir_dfmeta) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
ret = -1;
goto out;
}
- ret = syncop_opendir(this, loc, fd, NULL, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED,
- "Migrate data failed: Failed to open dir %s", loc->path);
- *perrno = -ret;
+ dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->lfd) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta", NULL);
ret = -1;
+ *perrno = ENOMEM;
goto out;
}
- fd_bind(fd);
- dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
- if (!dir_dfmeta) {
- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
- ret = -1;
- goto out;
+ for (i = 0; i < local_subvols_cnt; i++) {
+ dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid);
+ if (!dir_dfmeta->lfd[i]) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_FD_CREATE_FAILED,
+ NULL);
+ *perrno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i],
+ NULL, NULL);
+ if (ret) {
+ fd_unref(dir_dfmeta->lfd[i]);
+ dir_dfmeta->lfd[i] = NULL;
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FAILED_TO_OPEN,
+ "dir: %s", loc->path, "subvol: %s",
+ conf->local_subvols[i]->name, NULL);
+
+ if (conf->decommission_in_progress) {
+ *perrno = -ret;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ fd_bind(dir_dfmeta->lfd[i]);
+ }
}
dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)),
@@ -3349,6 +3351,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = -1;
goto out;
}
+
ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
@@ -3361,7 +3364,8 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int),
gf_common_mt_int);
if (!dir_dfmeta->fetch_entries) {
- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->fetch_entries is NULL");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta->fetch_entries", NULL);
ret = -1;
goto out;
}
@@ -3431,8 +3435,13 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ldfq_count <= MAX_MIGRATE_QUEUE_COUNT &&
!dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) {
ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf,
- defrag, fd, migrate_data, dir_dfmeta,
- xattr_req, &should_commit_hash, perrno);
+ defrag, dir_dfmeta->lfd[dfc_index],
+ migrate_data, dir_dfmeta, xattr_req,
+ perrno);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
+ goto out;
+ }
if (ret) {
gf_log(this->name, GF_LOG_WARNING,
@@ -3472,27 +3481,19 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
- (end.tv_usec - dir_start.tv_usec);
+ elapsed = gf_tvdiff(&dir_start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration operation on dir %s took "
"%.2f secs",
loc->path, elapsed / 1e6);
ret = 0;
out:
-
+ THIS = old_THIS;
gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt);
if (xattr_req)
dict_unref(xattr_req);
- if (fd)
- fd_unref(fd);
-
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
/* It does not matter if it errored out - this number is
* used to calculate rebalance estimated time to complete.
* No locking required as dirs are processed by a single thread.
@@ -3500,6 +3501,7 @@ out:
defrag->num_dirs_processed++;
return ret;
}
+
int
gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *fix_layout)
@@ -3514,7 +3516,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
* rebalance is complete.
*/
if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX ||
- defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
return 0;
}
@@ -3560,114 +3561,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
return 0;
}
-/* Function for doing a named lookup on file inodes during an attach tier
- * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal
- * happens on pre-existing data. This is required so that the ctr database has
- * hardlinks of all the exisitng file in the volume. CTR xlator on the
- * brick/server side does db update/insert of the hardlink on a namelookup.
- * Currently the namedlookup is done synchronous to the fixlayout that is
- * triggered by attach tier. This is not performant, adding more time to
- * fixlayout. The performant approach is record the hardlinks on a compressed
- * datastore and then do the namelookup asynchronously later, giving the ctr db
- * eventual consistency
- * */
-int
-gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc,
- gf_dirent_t *file_dentry)
-{
- int ret = -1;
- dict_t *lookup_xdata = NULL;
- dht_conf_t *conf = NULL;
- loc_t file_loc = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- GF_VALIDATE_OR_GOTO(this->name, parent_loc, out);
-
- GF_VALIDATE_OR_GOTO(this->name, file_dentry, out);
-
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- if (!parent_loc->inode) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- conf = this->private;
-
- loc_wipe(&file_loc);
-
- if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s gfid not present", parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid);
-
- if (gf_uuid_is_null(parent_loc->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s"
- " gfid not present",
- parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- gf_uuid_copy(file_loc.pargfid, parent_loc->gfid);
-
- ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Child loc build failed");
- ret = -1;
- goto out;
- }
-
- lookup_xdata = dict_new();
- if (!lookup_xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed creating lookup dict for %s", file_dentry->d_name);
- goto out;
- }
-
- ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to set lookup flag");
- goto out;
- }
-
- gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid);
-
- /* Sending lookup to cold tier only */
- ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL,
- lookup_xdata, NULL);
- if (ret) {
- /* If the file does not exist on the cold tier than it must */
- /* have been discovered on the hot tier. This is not an error. */
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "%s lookup to cold tier on attach heal failed", file_loc.path);
- goto out;
- }
-
- ret = 0;
-
-out:
-
- loc_wipe(&file_loc);
-
- if (lookup_xdata)
- dict_unref(lookup_xdata);
-
- return ret;
-}
-
int
gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *fix_layout, dict_t *migrate_data)
@@ -3687,7 +3580,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
};
inode_t *linked_inode = NULL, *inode = NULL;
dht_conf_t *conf = NULL;
- int should_commit_hash = 1;
int perrno = 0;
conf = this->private;
@@ -3790,16 +3682,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
if (!IA_ISDIR(entry->d_stat.ia_type)) {
- /* If its a fix layout during the attach
- * tier operation do lookups on files
- * on cold subvolume so that there is a
- * CTR DB Lookup Heal triggered on existing
- * data.
- * */
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- gf_fix_layout_tier_attach_lookup(this, loc, entry);
- }
-
continue;
}
loc_wipe(&entry_loc);
@@ -3816,8 +3698,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
} else {
- should_commit_hash = 0;
-
continue;
}
}
@@ -3880,7 +3760,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = -1;
goto out;
} else {
- should_commit_hash = 0;
continue;
}
}
@@ -3893,7 +3772,12 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout,
migrate_data);
- if (ret && ret != 2) {
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED ||
+ defrag->defrag_status == GF_DEFRAG_STATUS_FAILED) {
+ goto out;
+ }
+
+ if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED,
"Fix layout failed for %s", entry_loc.path);
@@ -3916,7 +3800,25 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
INIT_LIST_HEAD(&entries.list);
}
+ /* A directory layout is fixed only after its subdirs are healed to
+ * any newly added bricks. If the layout is fixed before subdirs are
+ * healed, the newly added brick will get a non-null layout.
+ * Any subdirs which hash to that layout will no longer show up
+ * in a directory listing until they are healed.
+ */
+
ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
+
+ /* In case of a race where the directory is deleted just before
+ * layout setxattr, the errors are updated in the layout structure.
+ * We can use this information to make a decision whether the directory
+ * is deleted entirely.
+ */
+ if (ret == 0) {
+ ret = dht_dir_layout_error_check(this, loc->inode);
+ ret = -ret;
+ }
+
if (ret) {
if (-ret == ENOENT || -ret == ESTALE) {
gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
@@ -3927,6 +3829,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
defrag->total_failures++;
}
ret = 0;
+ goto out;
} else {
gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
"Setxattr failed for %s", loc->path);
@@ -3941,11 +3844,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
}
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno);
- if (ret && (ret != 2)) {
+ if (ret) {
if (perrno == ENOENT || perrno == ESTALE) {
ret = 0;
goto out;
@@ -3961,18 +3863,13 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (conf->decommission_in_progress) {
goto out;
}
-
- should_commit_hash = 0;
}
- } else if (ret == 2) {
- should_commit_hash = 0;
}
}
gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path);
- if (should_commit_hash &&
- gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
+ if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
defrag->total_failures++;
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED,
@@ -3996,245 +3893,34 @@ out:
if (fd)
fd_unref(fd);
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
return ret;
}
-/******************************************************************************
- * Tier background Fix layout functions
- ******************************************************************************/
-/* This is the background tier fixlayout thread */
-void *
-gf_tier_do_fix_layout(void *args)
-{
- gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args;
- int ret = -1;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- dict_t *dict = NULL;
- loc_t loc = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
- struct iatt parent = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg, out);
- GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg->this, out);
- this = tier_fix_layout_arg->this;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag->root_inode, out);
-
- GF_VALIDATE_OR_GOTO(this->name, tier_fix_layout_arg->fix_layout, out);
-
- /* Get Root loc_t */
- dht_build_root_loc(defrag->root_inode, &loc);
- ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED,
- "Lookup on root failed.");
- ret = -1;
- goto out;
- }
-
- /* Start the crawl */
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering Fixlayout started");
-
- ret = gf_defrag_fix_layout(this, defrag, &loc,
- tier_fix_layout_arg->fix_layout, NULL);
- if (ret && ret != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
- "Tiering fixlayout failed.");
- ret = -1;
- goto out;
- }
-
- if (ret != 2 &&
- gf_defrag_settle_hash(this, defrag, &loc,
- tier_fix_layout_arg->fix_layout) != 0) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
-
- dict = dict_new();
- if (!dict) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_str(dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
- "Failed to set dictionary value: key = %s",
- GF_XATTR_TIER_LAYOUT_FIXED_KEY);
- ret = -1;
- goto out;
- }
-
- /* Marking the completion of tiering fix layout via a xattr on root */
- ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to set tiering fix "
- "layout completed xattr on %s",
- loc.path);
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- if (ret && defrag)
- defrag->total_failures++;
-
- if (dict)
- dict_unref(dict);
-
- return NULL;
-}
-
-int
-gf_tier_start_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag,
- dict_t *fix_layout)
-{
- int ret = -1;
- dict_t *tier_dict = NULL;
- gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL;
-
- tier_dict = dict_new();
- if (!tier_dict) {
- gf_log("tier", GF_LOG_ERROR,
- "Tier fix layout failed :"
- "Creation of tier_dict failed");
- ret = -1;
- goto out;
- }
-
- /* Check if layout is fixed already */
- ret = syncop_getxattr(this, loc, &tier_dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
- NULL, NULL);
- if (ret != 0) {
- tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg;
-
- /*Fill crawl arguments */
- tier_fix_layout_arg->this = this;
- tier_fix_layout_arg->fix_layout = fix_layout;
-
- /* Spawn the fix layout thread so that its done in the
- * background */
- ret = gf_thread_create(&tier_fix_layout_arg->thread_id, NULL,
- gf_tier_do_fix_layout, tier_fix_layout_arg,
- "tierfixl");
- if (ret) {
- gf_log("tier", GF_LOG_ERROR,
- "Thread creation failed. "
- "Background fix layout for tiering will not "
- "work.");
- defrag->total_failures++;
- goto out;
- }
- }
- ret = 0;
-out:
- if (tier_dict)
- dict_unref(tier_dict);
-
- return ret;
-}
-
-void
-gf_tier_clear_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
-{
- int ret = -1;
- dict_t *dict = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, loc, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- /* Check if background fixlayout is completed. This is not
- * multi-process safe i.e there is a possibility that by the time
- * we move to remove the xattr there it might have been cleared by some
- * other detach process from other node. We ignore the error if such
- * a thing happens */
- ret = syncop_getxattr(this, loc, &dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
- NULL, NULL);
- if (ret) {
- /* Background fixlayout not complete - nothing to clear*/
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Unable to retrieve fixlayout xattr."
- "Assume background fix layout not complete");
- goto out;
- }
-
- ret = syncop_removexattr(this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL,
- NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Failed removing tier fix layout "
- "xattr from %s",
- loc->path);
- goto out;
- }
- ret = 0;
-out:
- if (dict)
- dict_unref(dict);
-}
-
-void
-gf_tier_wait_fix_lookup(gf_defrag_info_t *defrag)
-{
- if (defrag->tier_conf.tier_fix_layout_arg.thread_id) {
- pthread_join(defrag->tier_conf.tier_fix_layout_arg.thread_id, NULL);
- }
-}
-/******************Tier background Fix layout functions END********************/
-
int
dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf,
loc_t *loc)
{
dict_t *dict = NULL;
- gf_defrag_info_t *defrag = NULL;
uuid_t *uuid_ptr = NULL;
int ret = -1;
int i = 0;
int j = 0;
- defrag = conf->defrag;
-
- if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) {
- /* Find local subvolumes */
- ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL,
- NULL, NULL);
- if (ret && (ret != -ENODATA)) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
- "local "
- "subvolume determination failed with error: %d",
- -ret);
- ret = -1;
- goto out;
- }
-
- if (!ret)
- goto out;
+ /* Find local subvolumes */
+ ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL,
+ NULL);
+ if (ret && (ret != -ENODATA)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
}
+ if (!ret)
+ goto out;
+
ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL,
NULL, NULL);
if (ret) {
@@ -4325,9 +4011,6 @@ dht_file_counter_thread(void *args)
struct timespec time_to_wait = {
0,
};
- struct timeval now = {
- 0,
- };
uint64_t tmp_size = 0;
if (!args)
@@ -4337,9 +4020,8 @@ dht_file_counter_thread(void *args)
dht_build_root_loc(defrag->root_inode, &root_loc);
while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
- gettimeofday(&now, NULL);
- time_to_wait.tv_sec = now.tv_sec + 600;
- time_to_wait.tv_nsec = 0;
+ timespec_now(&time_to_wait);
+ time_to_wait.tv_sec += 600;
pthread_mutex_lock(&defrag->fc_mutex);
pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex,
@@ -4412,7 +4094,7 @@ gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread)
goto out;
}
- ret = gf_thread_create(filecnt_thread, NULL, &dht_file_counter_thread,
+ ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread,
(void *)defrag, "dhtfcnt");
if (ret) {
@@ -4469,7 +4151,7 @@ gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag,
/*Spawn Threads Here*/
while (index < thread_spawn_count) {
- ret = gf_thread_create(&(tid[index]), NULL, &gf_defrag_task,
+ ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task,
(void *)defrag, "dhtmig%d", (index + 1) & 0x3ff);
if (ret != 0) {
gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ",
@@ -4543,7 +4225,6 @@ gf_defrag_start_crawl(void *data)
dict_t *migrate_data = NULL;
dict_t *status = NULL;
glusterfs_ctx_t *ctx = NULL;
- dht_methods_t *methods = NULL;
call_frame_t *statfs_frame = NULL;
xlator_t *old_THIS = NULL;
int ret = -1;
@@ -4559,7 +4240,6 @@ gf_defrag_start_crawl(void *data)
int thread_index = 0;
pthread_t *tid = NULL;
pthread_t filecnt_thread;
- gf_boolean_t is_tier_detach = _gf_false;
gf_boolean_t fc_thread_started = _gf_false;
this = data;
@@ -4578,7 +4258,8 @@ gf_defrag_start_crawl(void *data)
if (!defrag)
goto exit;
- gettimeofday(&defrag->start_time, NULL);
+ defrag->start_time = gf_time();
+
dht_build_root_inode(this, &defrag->root_inode);
if (!defrag->root_inode)
goto out;
@@ -4712,43 +4393,17 @@ gf_defrag_start_crawl(void *data)
}
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- /* Fix layout for attach tier */
- ret = gf_tier_start_fix_layout(this, &loc, defrag, fix_layout);
- if (ret) {
- goto out;
- }
-
- methods = &(conf->methods);
-
- /* Calling tier_start of tier.c */
- methods->migration_other(this, defrag);
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
- ret = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY,
- "force");
- if (ret)
- goto out;
- }
- } else {
- ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout,
- migrate_data);
- if (ret && ret != 2) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
-
- if (ret != 2 &&
- gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
+ ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data);
+ if (ret) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START)
- is_tier_detach = _gf_true;
+ if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
}
gf_log("DHT", GF_LOG_INFO, "crawling file-system completed");
@@ -4762,19 +4417,6 @@ out:
defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- /* Wait for the tier fixlayout to
- * complete if its was started.*/
- gf_tier_wait_fix_lookup(defrag);
- }
-
- if (is_tier_detach && ret == 0) {
- /* If it was a detach remove the tier fix-layout
- * xattr on root. Ignoring the failure, as nothing has to be
- * done, logging is done in gf_tier_clear_fix_layout */
- gf_tier_clear_fix_layout(this, &loc, defrag);
- }
-
gf_defrag_parallel_migration_cleanup(defrag, tid, thread_index);
if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
@@ -4788,9 +4430,9 @@ out:
dht_send_rebalance_event(this, defrag->cmd, defrag->defrag_status);
+ status = dict_new();
LOCK(&defrag->lock);
{
- status = dict_new();
gf_defrag_status_get(conf, status);
if (ctx && ctx->notify)
ctx->notify(GF_EN_DEFRAG_STATUS, status);
@@ -4873,9 +4515,6 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
uint64_t total_processed = 0;
uint64_t tmp_count = 0;
uint64_t time_to_complete = 0;
- struct timeval now = {
- 0,
- };
double elapsed = 0;
defrag = conf->defrag;
@@ -4883,8 +4522,7 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
if (!g_totalsize)
goto out;
- gettimeofday(&now, NULL);
- elapsed = now.tv_sec - defrag->start_time.tv_sec;
+ elapsed = gf_time() - defrag->start_time;
/* Don't calculate the estimates for the first 10 minutes.
* It is unlikely to be accurate and estimates are not required
@@ -4934,13 +4572,8 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
uint64_t lookup = 0;
uint64_t failures = 0;
uint64_t skipped = 0;
- uint64_t promoted = 0;
- uint64_t demoted = 0;
char *status = "";
double elapsed = 0;
- struct timeval end = {
- 0,
- };
uint64_t time_to_complete = 0;
uint64_t time_left = 0;
gf_defrag_info_t *defrag = conf->defrag;
@@ -4957,17 +4590,12 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
lookup = defrag->num_files_lookedup;
failures = defrag->total_failures;
skipped = defrag->skipped;
- promoted = defrag->total_files_promoted;
- demoted = defrag->total_files_demoted;
-
- gettimeofday(&end, NULL);
- elapsed = end.tv_sec - defrag->start_time.tv_sec;
+ elapsed = gf_time() - defrag->start_time;
/* The rebalance is still in progress */
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) {
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
time_to_complete = gf_defrag_get_estimates_based_on_size(conf);
if (time_to_complete && (time_to_complete > elapsed))
@@ -4982,14 +4610,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
if (!dict)
goto log;
- ret = dict_set_uint64(dict, "promoted", promoted);
- if (ret)
- gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count");
-
- ret = dict_set_uint64(dict, "demoted", demoted);
- if (ret)
- gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count");
-
ret = dict_set_uint64(dict, "files", files);
if (ret)
gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count");
@@ -5055,159 +4675,6 @@ out:
return 0;
}
-void
-gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state)
-{
- pthread_mutex_lock(&tier_conf->pause_mutex);
- tier_conf->pause_state = state;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-}
-
-tier_pause_state_t
-gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf)
-{
- int state;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- state = tier_conf->pause_state;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return state;
-}
-
-tier_pause_state_t
-gf_defrag_check_pause_tier(gf_tier_conf_t *tier_conf)
-{
- int woke = 0;
- int state = -1;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
-
- if (tier_conf->pause_state == TIER_RUNNING)
- goto out;
-
- if (tier_conf->pause_state == TIER_PAUSED)
- goto out;
-
- if (tier_conf->promote_in_progress || tier_conf->demote_in_progress)
- goto out;
-
- tier_conf->pause_state = TIER_PAUSED;
-
- if (tier_conf->pause_synctask) {
- synctask_wake(tier_conf->pause_synctask);
- tier_conf->pause_synctask = 0;
- woke = 1;
- }
-
- gf_msg("tier", GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, "woken %d", woke);
-
- gf_event(EVENT_TIER_PAUSE, "vol=%s", tier_conf->volname);
-out:
- state = tier_conf->pause_state;
-
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return state;
-}
-
-void
-gf_defrag_pause_tier_timeout(void *data)
-{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
-
- this = (xlator_t *)data;
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Request pause timer timeout");
-
- gf_defrag_check_pause_tier(&defrag->tier_conf);
-
-out:
- return;
-}
-
-int
-gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag)
-{
- int ret = 0;
- struct timespec delta = {
- 0,
- };
- int delay = 2;
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)
- goto out;
-
- /*
- * Set flag requesting to pause tiering. Wait 'delay' seconds for
- * tiering to actually stop as indicated by the pause state
- * before returning success or failure.
- */
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE);
-
- /*
- * If migration is not underway, can pause immediately.
- */
- gf_defrag_check_pause_tier(&defrag->tier_conf);
- if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED)
- goto out;
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Request pause tier");
-
- defrag->tier_conf.pause_synctask = synctask_get();
- delta.tv_sec = delay;
- delta.tv_nsec = 0;
- defrag->tier_conf.pause_timer = gf_timer_call_after(
- this->ctx, delta, gf_defrag_pause_tier_timeout, this);
-
- synctask_yield(defrag->tier_conf.pause_synctask);
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED)
- goto out;
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- ret = -1;
-out:
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Pause tiering ret=%d", ret);
-
- return ret;
-}
-
-int
-gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag)
-{
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_RESUME,
- "Pause end. Resume tiering");
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- gf_event(EVENT_TIER_RESUME, "vol=%s", defrag->tier_conf.volname);
-
- return 0;
-}
-
-int
-gf_defrag_start_detach_tier(gf_defrag_info_t *defrag)
-{
- defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
-
- return 0;
-}
-
int
gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
{
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index 5ba2373484a..d9dbf50492f 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -11,8 +11,6 @@
/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
* delete the newpath if it gets EEXISTS from link() call.
*/
-#include <glusterfs/glusterfs.h>
-#include <glusterfs/xlator.h>
#include "dht-common.h"
#include "dht-lock.h"
#include <glusterfs/defaults.h>
@@ -505,6 +503,8 @@ dht_order_rename_lock(call_frame_t *frame, loc_t **loc, xlator_t **subvol)
uuid_utoa_r(local->loc.pargfid, src);
else if (local->loc.parent)
uuid_utoa_r(local->loc.parent->gfid, src);
+ else
+ src[0] = '\0';
strcat(src, local->loc.name);
@@ -520,6 +520,8 @@ dht_order_rename_lock(call_frame_t *frame, loc_t **loc, xlator_t **subvol)
uuid_utoa_r(local->loc2.pargfid, dst);
else if (local->loc2.parent)
uuid_utoa_r(local->loc2.parent->gfid, dst);
+ else
+ dst[0] = '\0';
strcat(dst, local->loc2.name);
ret = strcmp(src, dst);
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index f5dfff9a11f..3e24065227c 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -8,12 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/glusterfs.h>
-#include <glusterfs/xlator.h>
-#include "dht-common.h"
-#include "dht-messages.h"
#include "dht-lock.h"
-#include <glusterfs/glusterfs-acl.h>
#define DHT_SET_LAYOUT_RANGE(layout, i, srt, chunk, path) \
do { \
@@ -22,7 +17,7 @@
layout->list[i].commit_hash = layout->commit_hash; \
\
gf_msg_trace(this->name, 0, \
- "gave fix: %u - %u, with commit-hash %u" \
+ "gave fix: 0x%x - 0x%x, with commit-hash 0x%x" \
" on %s for %s", \
layout->list[i].start, layout->list[i].stop, \
layout->list[i].commit_hash, \
@@ -38,7 +33,7 @@
} \
} while (0)
-int
+static int
dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
gf_boolean_t newdir, dht_selfheal_layout_t healer,
dht_need_heal_t should_heal);
@@ -149,8 +144,8 @@ dht_refresh_layout_done(call_frame_t *frame)
ret = dht_layout_sort(refreshed);
if (ret == -1) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
- "sorting the layout failed");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SORT_FAILED, NULL);
goto err;
}
@@ -206,10 +201,9 @@ dht_refresh_layout_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
gf_uuid_unparse(local->loc.gfid, gfid);
local->op_errno = op_errno;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_FILE_LOOKUP_FAILED,
- "lookup of %s on %s returned error, gfid: %s",
- local->loc.path, prev->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_FILE_LOOKUP_FAILED, "path=%s", local->loc.path,
+ "name=%s", prev->name, "gfid=%s", gfid, NULL);
goto unlock;
}
@@ -270,9 +264,8 @@ dht_refresh_layout(call_frame_t *frame)
conf->subvolume_cnt);
if (!local->selfheal.refreshed_layout) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation for layout failed, path:%s gfid:%s",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
@@ -284,9 +277,8 @@ dht_refresh_layout(call_frame_t *frame)
gf_uuid_unparse(local->loc.gfid, gfid);
local->xattr_req = dict_new();
if (local->xattr_req == NULL) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "dict mem allocation failed, path:%s gfid:%s",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
}
@@ -294,9 +286,9 @@ dht_refresh_layout(call_frame_t *frame)
if (dict_get(local->xattr_req, conf->xattr_name) == 0) {
ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:key = %s",
- local->loc.path, conf->xattr_name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s", conf->xattr_name,
+ NULL);
}
for (i = 0; i < call_cnt; i++) {
@@ -529,7 +521,7 @@ out:
return fixit;
}
-int
+static int
dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
gf_boolean_t newdir, dht_selfheal_layout_t healer,
dht_need_heal_t should_heal)
@@ -561,10 +553,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
if (lk_array == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for "
- "lk_array, gfid:%s path: %s",
- gfid, local->loc.path);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
goto err;
}
@@ -574,10 +564,9 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
DHT_LAYOUT_HEAL_DOMAIN, NULL, FAIL_ON_ANY_ERROR);
if (lk_array[i] == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation "
- "failed for lk_array, gfid:%s path:%s",
- gfid, local->loc.path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_MEM_ALLOC_FAILED, "lk_array-gfid=%s", gfid,
+ "path=%s", local->loc.path, NULL);
goto err;
}
}
@@ -586,10 +575,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
if (lk_array == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for "
- "lk_array, gfid:%s path:%s",
- gfid, local->loc.path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
goto err;
}
@@ -598,10 +585,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
NULL, FAIL_ON_ANY_ERROR);
if (lk_array[0] == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for "
- "lk_array, gfid:%s path:%s",
- gfid, local->loc.path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
goto err;
}
}
@@ -627,7 +612,7 @@ err:
return -1;
}
-int
+static int
dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -649,10 +634,9 @@ dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
err = 0;
} else {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "layout setxattr failed on %s, path:%s gfid:%s", subvol->name,
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "name=%s", subvol->name,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
err = op_errno;
}
@@ -699,7 +683,7 @@ dht_set_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
return ret;
}
-int
+static int
dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
dht_layout_t *layout, int i,
xlator_t *req_subvol)
@@ -741,19 +725,17 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s",
- loc->path, GLUSTERFS_INTERNAL_FOP_KEY, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
goto err;
}
ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s",
- loc->path, DHT_IATT_IN_XDATA_KEY, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", DHT_IATT_IN_XDATA_KEY,
+ "gfid=%s", gfid, NULL);
goto err;
}
@@ -761,27 +743,27 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
ret = dht_disk_layout_extract(this, layout, i, &disk_layout);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- " %s: (subvol %s) Failed to extract disk layout,"
- " gfid = %s",
- loc->path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "extract-disk-layout-failed, path=%s", loc->path, "subvol=%s",
+ subvol->name, "gfid=%s", gfid, NULL);
goto err;
}
ret = dict_set_bin(xattr, conf->xattr_name, disk_layout, 4 * 4);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- "%s: (subvol %s) Failed to set xattr dictionary,"
- " gfid = %s",
- loc->path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s", loc->path,
+ "subvol=%s", subvol->name,
+ "set-xattr-dictionary-failed"
+ "gfid=%s",
+ gfid, NULL);
goto err;
}
disk_layout = NULL;
gf_msg_trace(this->name, 0,
- "setting hash range %u - %u (type %d) on subvolume %s"
+ "setting hash range 0x%x - 0x%x (type %d) on subvolume %s"
" for %s",
layout->list[i].start, layout->list[i].stop, layout->type,
subvol->name, loc->path);
@@ -791,20 +773,17 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
if (data) {
ret = dict_add(xattr, QUOTA_LIMIT_KEY, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = %s",
- loc->path, QUOTA_LIMIT_KEY);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_KEY, NULL);
}
}
data = dict_get(local->xattr, QUOTA_LIMIT_OBJECTS_KEY);
if (data) {
ret = dict_add(xattr, QUOTA_LIMIT_OBJECTS_KEY, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = %s",
- loc->path, QUOTA_LIMIT_OBJECTS_KEY);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_OBJECTS_KEY,
+ NULL);
}
}
}
@@ -833,7 +812,7 @@ err:
return 0;
}
-int
+static int
dht_fix_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
@@ -882,7 +861,7 @@ out:
return 0;
}
-int
+static int
dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
@@ -942,9 +921,8 @@ dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
dummy = dht_layout_new(this, 1);
if (!dummy) {
gf_uuid_unparse(loc->gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "failed to allocate dummy layout, path:%s gfid:%s", loc->path,
- gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DUMMY_ALLOC_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
goto out;
}
for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) {
@@ -960,38 +938,6 @@ out:
return 0;
}
-gf_boolean_t
-dht_is_subvol_part_of_layout(dht_layout_t *layout, xlator_t *xlator)
-{
- int i = 0;
- gf_boolean_t ret = _gf_false;
-
- for (i = 0; i < layout->cnt; i++) {
- if (!strcmp(layout->list[i].xlator->name, xlator->name)) {
- ret = _gf_true;
- break;
- }
- }
-
- return ret;
-}
-
-int
-dht_layout_index_from_conf(dht_layout_t *layout, xlator_t *xlator)
-{
- int i = -1;
- int j = 0;
-
- for (j = 0; j < layout->cnt; j++) {
- if (!strcmp(layout->list[j].xlator->name, xlator->name)) {
- i = j;
- break;
- }
- }
-
- return i;
-}
-
int
dht_selfheal_dir_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
@@ -1082,7 +1028,7 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -1112,11 +1058,10 @@ dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name,
- ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING), op_errno,
- DHT_MSG_DIR_SELFHEAL_FAILED,
- "Directory selfheal failed: path = %s, gfid = %s",
- local->loc.path, gfid);
+ gf_smsg(this->name,
+ ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING),
+ op_errno, DHT_MSG_DIR_SELFHEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
dht_iatt_merge(this, &local->preparent, preparent);
@@ -1135,89 +1080,7 @@ out:
return 0;
}
-void
-dht_selfheal_dir_mkdir_setacl(dict_t *xattr, dict_t *dict)
-{
- data_t *acl_default = NULL;
- data_t *acl_access = NULL;
- xlator_t *this = NULL;
- int ret = -1;
-
- GF_ASSERT(xattr);
- GF_ASSERT(dict);
-
- this = THIS;
- GF_ASSERT(this);
-
- acl_default = dict_get(xattr, POSIX_ACL_DEFAULT_XATTR);
-
- if (!acl_default) {
- gf_msg_debug(this->name, 0, "ACL_DEFAULT xattr not present");
- goto cont;
- }
- ret = dict_set(dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- POSIX_ACL_DEFAULT_XATTR);
-cont:
- acl_access = dict_get(xattr, POSIX_ACL_ACCESS_XATTR);
- if (!acl_access) {
- gf_msg_debug(this->name, 0, "ACL_ACCESS xattr not present");
- goto out;
- }
- ret = dict_set(dict, POSIX_ACL_ACCESS_XATTR, acl_access);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- POSIX_ACL_ACCESS_XATTR);
-
-out:
- return;
-}
-
-void
-dht_selfheal_dir_mkdir_setquota(dict_t *src, dict_t *dst)
-{
- data_t *quota_limit_key = NULL;
- data_t *quota_limit_obj_key = NULL;
- xlator_t *this = NULL;
- int ret = -1;
-
- GF_ASSERT(src);
- GF_ASSERT(dst);
-
- this = THIS;
- GF_ASSERT(this);
-
- quota_limit_key = dict_get(src, QUOTA_LIMIT_KEY);
- if (!quota_limit_key) {
- gf_msg_debug(this->name, 0, "QUOTA_LIMIT_KEY xattr not present");
- goto cont;
- }
- ret = dict_set(dst, QUOTA_LIMIT_KEY, quota_limit_key);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s", QUOTA_LIMIT_KEY);
-
-cont:
- quota_limit_obj_key = dict_get(src, QUOTA_LIMIT_OBJECTS_KEY);
- if (!quota_limit_obj_key) {
- gf_msg_debug(this->name, 0,
- "QUOTA_LIMIT_OBJECTS_KEY xattr not present");
- goto out;
- }
- ret = dict_set(dst, QUOTA_LIMIT_OBJECTS_KEY, quota_limit_obj_key);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- QUOTA_LIMIT_OBJECTS_KEY);
-
-out:
- return;
-}
-
-int
+static int
dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
@@ -1241,10 +1104,8 @@ dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = gfid-req",
- loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=gfid-req", NULL);
} else if (local->params) {
/* Send the dictionary from higher layers directly */
@@ -1256,18 +1117,15 @@ dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
dht_dir_set_heal_xattr(this, local, dict, local->xattr, NULL, NULL);
if (!dict) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "dict is NULL, need to make sure gfids are same");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_IS_NULL, NULL);
dict = dict_new();
if (!dict)
return -1;
}
ret = dict_set_flag(dict, GF_INTERNAL_CTX_KEY, GF_DHT_HEAL_DIR);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value for"
- " key = %s at path: %s",
- GF_INTERNAL_CTX_KEY, loc->path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "key=%s",
+ GF_INTERNAL_CTX_KEY, "path=%s", loc->path, NULL);
/* We can still continue. As heal can still happen
* unless quota limits have reached for the dir.
*/
@@ -1299,7 +1157,7 @@ err:
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
@@ -1393,7 +1251,7 @@ err:
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -1413,19 +1271,14 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
local->call_cnt = conf->subvolume_cnt;
if (op_ret < 0) {
- /* We get this error when the directory entry was not created
- * on a newky attached tier subvol. Hence proceed and do mkdir
- * on the tier subvol.
- */
if (op_errno == EINVAL) {
local->call_cnt = 1;
dht_selfheal_dir_mkdir_lookup_done(frame, this);
return 0;
}
- gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR,
- "acquiring entrylk after inodelk failed for %s",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR,
+ "path=%s", local->loc.path, NULL);
local->op_errno = op_errno;
goto err;
@@ -1439,10 +1292,8 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
ret = dict_set_int32(local->xattr_req, "list-xattr", 1);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary key list-xattr value "
- " for path %s ",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "path=%s",
+ local->loc.path, NULL);
for (i = 0; i < conf->subvolume_cnt; i++) {
if (mds_subvol && conf->subvolumes[i] == mds_subvol) {
@@ -1465,18 +1316,21 @@ err:
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
int force)
{
int missing_dirs = 0;
int i = 0;
+ int op_errno = 0;
int ret = -1;
dht_local_t *local = NULL;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
local->selfheal.force_mkdir = force;
local->selfheal.hole_cnt = 0;
@@ -1493,13 +1347,12 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
if (!__is_root_gfid(local->stbuf.ia_gfid)) {
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "%s:xattr heal failed for "
- "directory (gfid = %s)",
- local->loc.path, local->gfid);
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", local->gfid, NULL);
+ }
} else {
if (!gf_uuid_is_null(local->gfid))
gf_uuid_copy(loc->gfid, local->gfid);
@@ -1508,28 +1361,53 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
if (!ret)
return 0;
- gf_msg(this->name, GF_LOG_INFO, 0,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "%s: Failed to set mds xattr "
- "for directory (gfid = %s)",
- local->loc.path, local->gfid);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SET_XATTR_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", local->gfid,
+ NULL);
}
}
dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff, layout);
return 0;
}
- if (local->hashed_subvol == NULL)
- local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ /* MDS xattr is populated only while DHT is having more than one
+ subvol.In case of graph switch while adding more dht subvols need to
+ consider hash subvol as a MDS to avoid MDS check failure at the time
+ of running fop on directory
+ */
+ if (!dict_get(local->xattr, conf->mds_xattr_key) &&
+ (conf->subvolume_cnt > 1)) {
+ if (local->hashed_subvol == NULL) {
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s",
+ loc->pargfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
+ goto err;
+ }
+ }
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this,
+ local->hashed_subvol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path,
+ local->hashed_subvol ? local->hashed_subvol->name : "NULL");
+ goto err;
+ }
+ }
if (local->hashed_subvol == NULL) {
- local->op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "(%s/%s) (path: %s): "
- "hashed subvolume not found",
- loc->pargfid, loc->name, loc->path);
- goto err;
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid,
+ "name=%s", loc->name, "path=%s", loc->path, NULL);
+ goto err;
+ }
}
local->current = &local->lock[0];
@@ -1545,7 +1423,7 @@ err:
return -1;
}
-int
+static int
dht_selfheal_layout_alloc_start(xlator_t *this, loc_t *loc,
dht_layout_t *layout)
{
@@ -1641,7 +1519,7 @@ dht_get_layout_count(xlator_t *this, dht_layout_t *layout, int new_layout)
/* if layout->spread_cnt is set, check if it is <= available
* subvolumes (down brick and decommissioned bricks are considered
- * un-availbale). Else return count (available up bricks) */
+ * un-available). Else return count (available up bricks) */
count = ((layout->spread_cnt && (layout->spread_cnt <= count))
? layout->spread_cnt
: ((count) ? count : 1));
@@ -1654,8 +1532,6 @@ dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
dht_layout_t *new_layout);
void
-dht_layout_entry_swap(dht_layout_t *layout, int i, int j);
-void
dht_layout_range_swap(dht_layout_t *layout, int i, int j);
/*
@@ -1664,7 +1540,7 @@ dht_layout_range_swap(dht_layout_t *layout, int i, int j);
*/
#define OV_ENTRY(x, y) table[x * new->cnt + y]
-void
+static void
dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc,
dht_layout_t *new, dht_layout_t *old)
{
@@ -1741,7 +1617,7 @@ dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc,
}
}
-dht_layout_t *
+static dht_layout_t *
dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
dht_layout_t *layout)
{
@@ -1766,9 +1642,8 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
new_layout = dht_layout_new(this, priv->subvolume_cnt);
if (!new_layout) {
gf_uuid_unparse(loc->gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for new_layout, path:%s gfid:%s",
- loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "new_layout, path=%s", loc->path, "gfid=%s", gfid, NULL);
goto done;
}
@@ -1778,10 +1653,9 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
if (subvol_down) {
gf_uuid_unparse(loc->gfid, gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED,
- "Layout fix failed: %u subvolume(s) are down"
- ". Skipping fix layout. path:%s gfid:%s",
- subvol_down, loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED,
+ "subvol-down=%u", subvol_down, "Skipping-fix-layout", "path=%s",
+ loc->path, "gfid=%s", gfid, NULL);
GF_FREE(new_layout);
return NULL;
}
@@ -1799,10 +1673,10 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
if (priv->du_stats) {
for (i = 0; i < priv->subvolume_cnt; ++i) {
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO,
- "subvolume %d (%s): %u chunks, path:%s", i,
- priv->subvolumes[i]->name, priv->du_stats[i].chunks,
- loc->path);
+ gf_smsg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO,
+ "index=%d", i, "name=%s", priv->subvolumes[i]->name,
+ "chunks=%u", priv->du_stats[i].chunks, "path=%s", loc->path,
+ NULL);
/* Maximize overlap if the bricks are all the same
* size.
@@ -1814,8 +1688,8 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
}
}
} else {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS,
- "no du stats ?!?");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS,
+ NULL);
}
/* First give it a layout as though it is a new directory. This
@@ -1846,7 +1720,7 @@ done:
* Having to call this 2x for each entry in the layout is pretty horrible, but
* that's what all of this layout-sorting nonsense gets us.
*/
-uint32_t
+static uint32_t
dht_get_chunks_from_xl(xlator_t *parent, xlator_t *child)
{
dht_conf_t *priv = parent->private;
@@ -1964,7 +1838,7 @@ done:
return;
}
-int
+static int
dht_selfheal_dir_getafix(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
@@ -2023,9 +1897,8 @@ dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
linked_inode = inode_link(loc->inode, loc->parent, loc->name,
&local->stbuf);
if (!linked_inode) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_FAILED,
- "linking inode failed (%s/%s) => %s", pgfid, loc->name, gfid);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid, NULL);
ret = -1;
goto out;
}
@@ -2097,9 +1970,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
local->selfheal.dir_cbk = dir_cbk;
local->selfheal.layout = dht_layout_ref(this, layout);
- if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) {
- /*Use the one in the mds_stbuf*/
- local->stbuf = local->mds_stbuf;
+ if (local->need_attrheal) {
+ if (__is_root_gfid(local->stbuf.ia_gfid)) {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+
+ local->stbuf.ia_ctime = local->prebuf.ia_ctime;
+ local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec;
+ local->stbuf.ia_prot = local->prebuf.ia_prot;
+
+ } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) {
+ local->stbuf = local->mds_stbuf;
+ }
}
if (!__is_root_gfid(local->stbuf.ia_gfid)) {
@@ -2109,9 +1991,9 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
linked_inode = inode_link(loc->inode, loc->parent, loc->name,
&local->stbuf);
if (!linked_inode) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
- "linking inode failed (%s/%s) => %s", pgfid, loc->name,
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid,
+ NULL);
ret = 0;
goto sorry_no_fix;
}
@@ -2137,19 +2019,17 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
misc = local->selfheal.misc;
if (down) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
- "%s: Directory selfheal failed: %d subvolumes down."
- "Not fixing. gfid = %s",
- loc->path, down, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "subvol-down=%d", down, "Not-fixing",
+ "gfid=%s", gfid, NULL);
ret = 0;
goto sorry_no_fix;
}
if (misc) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
- "%s: Directory selfheal failed : %d subvolumes "
- "have unrecoverable errors. gfid = %s",
- loc->path, misc, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "misc=%d", misc, "unrecoverable-errors",
+ "gfid=%s", gfid, NULL);
ret = 0;
goto sorry_no_fix;
@@ -2235,29 +2115,28 @@ dht_dir_heal_xattrs(void *data)
gf_uuid_unparse(local->loc.gfid, gfid);
if (!mds_subvol) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "No mds subvol for %s gfid = %s", local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
if ((local->loc.inode && gf_uuid_is_null(local->loc.inode->gfid)) ||
gf_uuid_is_null(local->loc.gfid)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "No gfid present so skip heal for path %s gfid = %s",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_NOT_PRESENT,
+ "skip-heal path=%s", local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
internal_xattr = dict_new();
if (!internal_xattr) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
- "dictionary creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
goto out;
}
xdata = dict_new();
if (!xdata) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
- "dictionary creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
goto out;
}
@@ -2265,18 +2144,17 @@ dht_dir_heal_xattrs(void *data)
user_xattr = dict_new();
if (!user_xattr) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
- "dictionary creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
goto out;
}
ret = syncop_listxattr(local->mds_subvol, &local->loc, &mds_xattr, NULL,
NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "failed to list xattrs for "
- "%s: on %s ",
- local->loc.path, local->mds_subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LIST_XATTRS_FAILED,
+ "path=%s", local->loc.path, "name=%s", local->mds_subvol->name,
+ NULL);
}
if (!mds_xattr)
@@ -2291,10 +2169,9 @@ dht_dir_heal_xattrs(void *data)
dict_get(user_xattr, QUOTA_LIMIT_OBJECTS_KEY)) {
ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value: key = %s,"
- " path = %s",
- GLUSTERFS_INTERNAL_FOP_KEY, local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", GLUSTERFS_INTERNAL_FOP_KEY, "path=%s",
+ local->loc.path, NULL);
goto out;
}
}
@@ -2306,16 +2183,25 @@ dht_dir_heal_xattrs(void *data)
if (subvol == mds_subvol)
continue;
if (uret || uflag) {
+ /* Custom xattr heal is required - let posix handle it */
+ ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s",
+ "sync_backend_xattrs", NULL);
+ goto out;
+ }
+
ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata,
NULL);
if (ret) {
xattr_hashed = 1;
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "Directory xattr heal failed. Failed to set"
- "user xattr on path %s on "
- "subvol %s, gfid = %s ",
- local->loc.path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "set-user-xattr-failed path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
+ } else {
+ dict_del(xdata, "sync_backend_xattrs");
}
}
}
@@ -2324,21 +2210,17 @@ dht_dir_heal_xattrs(void *data)
ret = dht_dict_set_array(internal_xattr, conf->mds_xattr_key, allzero,
1);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- conf->mds_xattr_key, local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", conf->mds_xattr_key, "path=%s", local->loc.path,
+ NULL);
goto out;
}
ret = syncop_setxattr(mds_subvol, &local->loc, internal_xattr, 0, NULL,
NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "Failed to reset internal xattr "
- "on path %s on subvol %s"
- "gfid = %s ",
- local->loc.path, mds_subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", mds_subvol->name, "gfid=%s", gfid, NULL);
}
}
@@ -2389,8 +2271,8 @@ dht_dir_attr_heal(void *data)
call_cnt = conf->subvolume_cnt;
if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "No mds subvol for %s gfid = %s", local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
@@ -2398,11 +2280,9 @@ dht_dir_attr_heal(void *data)
for (i = 0; i < conf->subvolume_cnt; i++) {
if (conf->subvolumes[i] == mds_subvol) {
if (!conf->subvolume_status[i]) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_HASHED_SUBVOL_DOWN,
- "mds subvol is down for path "
- " %s gfid is %s Unable to set xattr ",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
}
@@ -2428,10 +2308,9 @@ dht_dir_attr_heal(void *data)
if (ret) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "Directory attr heal failed. Failed to set"
- " uid/gid on path %s on subvol %s, gfid = %s ",
- local->loc.path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_ATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
}
}
out:
@@ -2446,7 +2325,7 @@ dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data)
}
/* EXIT: dht_update_commit_hash_for_layout */
-int
+static int
dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -2466,7 +2345,7 @@ dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie,
return 0;
}
-int
+static int
dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
@@ -2484,11 +2363,8 @@ dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
local->op_ret = -1;
}
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Winding unlock failed: stale locks left on brick"
- " %s",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_WIND_UNLOCK_FAILED,
+ "path=%s", local->loc.path, NULL);
dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL);
}
@@ -2496,7 +2372,7 @@ dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
return 0;
}
-int
+static int
dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
dict_t *xdata)
@@ -2523,7 +2399,7 @@ dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie,
return 0;
}
-int
+static int
dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -2551,11 +2427,8 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (!xattr) {
local->op_errno = errno;
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: Allocation failed",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_COMMIT_HASH_FAILED,
+ "allocation-failed path=%s", local->loc.path, NULL);
goto err;
}
@@ -2566,11 +2439,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (ret < 0) {
local->op_errno = ENOENT;
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: (subvol %s) Failed to find disk layout",
- local->loc.path, conf->local_subvols[i]->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMMIT_HASH_FAILED,
+ "path=%s", local->loc.path, "subvol=%s",
+ conf->local_subvols[i]->name, "find-disk-layout-failed",
+ NULL);
goto err;
}
@@ -2584,12 +2456,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (ret == -1) {
local->op_errno = errno;
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: (subvol %s) Failed to extract disk"
- " layout",
- local->loc.path, conf->local_subvols[i]->name);
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", conf->local_subvols[i]->name,
+ "extract-disk-layout-failed", NULL);
goto err;
}
@@ -2598,11 +2468,9 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (!xattr[i]) {
local->op_errno = errno;
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: Allocation failed",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s Allocation-failed",
+ local->loc.path, NULL);
goto err;
}
@@ -2611,12 +2479,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (ret != 0) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- "%s: (subvol %s) Failed to set xattr"
- " dictionary,",
- local->loc.path, conf->local_subvols[i]->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s",
+ local->loc.path, "subvol=%s", conf->local_subvols[i]->name,
+ "set-xattr-failed", NULL);
goto err;
}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index c7ef2f1190b..bb72b0ffbb5 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -17,24 +17,6 @@
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
-#define GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf) \
- { \
- pthread_mutex_lock(&conf->defrag->dfq_mutex); \
- \
- if (!strcasecmp(conf->dthrottle, "lazy")) \
- conf->defrag->recon_thread_count = 1; \
- \
- throttle_count = MAX((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4); \
- \
- if (!strcasecmp(conf->dthrottle, "normal")) \
- conf->defrag->recon_thread_count = (throttle_count / 2); \
- \
- if (!strcasecmp(conf->dthrottle, "aggressive")) \
- conf->defrag->recon_thread_count = throttle_count; \
- \
- pthread_mutex_unlock(&conf->defrag->dfq_mutex); \
- }
-
/* TODO:
- use volumename in xattr instead of "dht"
- use NS locks
@@ -42,9 +24,7 @@
- complete linkfile selfheal
*/
-extern dht_methods_t dht_methods;
-
-void
+static void
dht_layout_dump(dht_layout_t *layout, const char *prefix)
{
char key[GF_DUMP_MAX_BUF_LEN];
@@ -52,8 +32,6 @@ dht_layout_dump(dht_layout_t *layout, const char *prefix)
if (!layout)
goto out;
- if (!prefix)
- goto out;
gf_proc_dump_build_key(key, prefix, "cnt");
gf_proc_dump_write(key, "%d", layout->cnt);
@@ -73,9 +51,9 @@ dht_layout_dump(dht_layout_t *layout, const char *prefix)
gf_proc_dump_build_key(key, prefix, "list[%d].err", i);
gf_proc_dump_write(key, "%d", layout->list[i].err);
gf_proc_dump_build_key(key, prefix, "list[%d].start", i);
- gf_proc_dump_write(key, "%u", layout->list[i].start);
+ gf_proc_dump_write(key, "0x%x", layout->list[i].start);
gf_proc_dump_build_key(key, prefix, "list[%d].stop", i);
- gf_proc_dump_write(key, "%u", layout->list[i].stop);
+ gf_proc_dump_write(key, "0x%x", layout->list[i].stop);
if (layout->list[i].xlator) {
gf_proc_dump_build_key(key, prefix, "list[%d].xlator.type", i);
gf_proc_dump_write(key, "%s", layout->list[i].xlator->type);
@@ -162,9 +140,9 @@ dht_priv_dump(xlator_t *this)
}
}
- if (conf->last_stat_fetch.tv_sec)
+ if (conf->last_stat_fetch)
gf_proc_dump_write("last_stat_fetch", "%s",
- ctime(&conf->last_stat_fetch.tv_sec));
+ ctime(&conf->last_stat_fetch));
UNLOCK(&conf->subvolume_lock);
@@ -264,7 +242,7 @@ out:
return ret;
}
-int
+static int
dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf,
const char *bricks)
{
@@ -278,6 +256,10 @@ dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf,
goto out;
dup_brick = gf_strdup(bricks);
+ if (dup_brick == NULL) {
+ goto out;
+ }
+
node = strtok_r(dup_brick, ",", &tmpstr);
while (node) {
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -306,14 +288,10 @@ out:
return ret;
}
-int
+static void
dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf)
{
int i = 0;
- int ret = -1;
-
- if (!conf)
- goto out;
for (i = 0; i < conf->subvolume_cnt; i++) {
if (conf->decommissioned_bricks[i]) {
@@ -321,13 +299,9 @@ dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf)
conf->decommission_subvols_cnt--;
}
}
-
- ret = 0;
-out:
-
- return ret;
}
-void
+
+static void
dht_init_regex(xlator_t *this, dict_t *odict, char *name, regex_t *re,
gf_boolean_t *re_valid, dht_conf_t *conf)
{
@@ -384,7 +358,7 @@ out:
return ret;
}
-int
+static int
dht_configure_throttle(xlator_t *this, dht_conf_t *conf, char *temp_str)
{
int rebal_thread_count = 0;
@@ -401,18 +375,20 @@ dht_configure_throttle(xlator_t *this, dht_conf_t *conf, char *temp_str)
} else if ((gf_string2int(temp_str, &rebal_thread_count) == 0)) {
if ((rebal_thread_count > 0) &&
(rebal_thread_count <= MAX_REBAL_THREADS)) {
+ conf->defrag->recon_thread_count = rebal_thread_count;
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
gf_msg(this->name, GF_LOG_INFO, 0, 0,
"rebal thread count configured to %d",
rebal_thread_count);
- conf->defrag->recon_thread_count = rebal_thread_count;
+ goto out;
} else {
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
"Invalid option: Reconfigure: "
"rebal-throttle should be "
"within range of 0 and maximum number of"
" cores available");
ret = -1;
- pthread_mutex_unlock(&conf->defrag->dfq_mutex);
goto out;
}
} else {
@@ -521,9 +497,7 @@ dht_reconfigure(xlator_t *this, dict_t *options)
if (ret == -1)
goto out;
} else {
- ret = dht_decommissioned_remove(this, conf);
- if (ret == -1)
- goto out;
+ dht_decommissioned_remove(this, conf);
}
dht_init_regex(this, options, "rsync-hash-regex", &conf->rsync_regex,
@@ -563,6 +537,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag,
pattern_str = strtok_r(data, ",", &tmp_str);
while (pattern_str) {
dup_str = gf_strdup(pattern_str);
+ if (!dup_str)
+ goto out;
pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1);
if (!pattern_list) {
goto out;
@@ -609,7 +585,7 @@ out:
return ret;
}
-int
+static int
dht_init_methods(xlator_t *this)
{
int ret = -1;
@@ -622,7 +598,6 @@ dht_init_methods(xlator_t *this)
methods = &(conf->methods);
methods->migration_get_dst_subvol = dht_migration_get_dst_subvol;
- methods->migration_needed = dht_migration_needed;
methods->migration_other = NULL;
methods->layout_search = dht_layout_search;
@@ -1071,84 +1046,6 @@ struct volume_options dht_options[] = {
/* NUFA option */
{.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR},
- /* tier options */
- {
- .key = {"tier-pause"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
-
- {
- .key = {"tier-promote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "120",
- },
-
- {
- .key = {"tier-demote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "3600",
- },
-
- {
- .key = {"write-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
-
- {
- .key = {"read-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
- {
- .key = {"watermark-hi"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "90",
- },
- {
- .key = {"watermark-low"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "75",
- },
- {
- .key = {"tier-mode"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "test",
- },
- {
- .key = {"tier-compact"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- {.key = {"tier-hot-compact-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "604800",
- .description = "Frequency to compact DBs on hot tier in system"},
- {.key = {"tier-cold-compact-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "604800",
- .description = "Frequency to compact DBs on cold tier in system"},
- {
- .key = {"tier-max-mb"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "4000",
- },
- {
- .key = {"tier-max-promote-file-size"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
- {
- .key = {"tier-max-files"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "10000",
- },
- {
- .key = {"tier-query-limit"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "100",
- },
/* switch option */
{.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY},
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 934eace1ad1..53de8292704 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/statedump.h>
#include "dht-common.h"
struct xlator_fops dht_pt_fops = {
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 59313639c45..3648a564840 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -595,7 +595,6 @@ nufa_init(xlator_t *this)
dht_methods_t dht_methods = {
.migration_get_dst_subvol = dht_migration_get_dst_subvol,
- .migration_needed = dht_migration_needed,
.layout_search = dht_layout_search,
};
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index a782fcdfbd2..207d109a025 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -610,9 +610,15 @@ set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str)
/* Get the pattern for considering switch case.
"option block-size *avi:10MB" etc */
option_string = gf_strdup(pattern_str);
+ if (option_string == NULL) {
+ goto err;
+ }
switch_str = strtok_r(option_string, ";", &tmp_str);
while (switch_str) {
dup_str = gf_strdup(switch_str);
+ if (dup_str == NULL) {
+ goto err;
+ }
switch_opt = GF_CALLOC(1, sizeof(struct switch_struct),
gf_switch_mt_switch_struct);
if (!switch_opt) {
@@ -647,6 +653,9 @@ set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str)
if (childs) {
dup_childs = gf_strdup(childs);
+ if (dup_childs == NULL) {
+ goto err;
+ }
child = strtok_r(dup_childs, ",", &tmp);
while (child) {
if (gf_switch_valid_child(this, child)) {
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
index c5af2ab5e39..703a30e2485 100644
--- a/xlators/cluster/ec/src/ec-combine.c
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -179,13 +179,14 @@ ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
"links: %u-%u, uid: %u-%u, gid: %u-%u, "
"rdev: %" PRIu64 "-%" PRIu64 ", size: %" PRIu64 "-%" PRIu64
", "
- "mode: %o-%o)",
+ "mode: %o-%o), %s",
dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink,
src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, dst[i].ia_gid,
src[i].ia_gid, dst[i].ia_rdev, src[i].ia_rdev,
dst[i].ia_size, src[i].ia_size,
st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type),
- st_mode_from_ia(src[i].ia_prot, dst[i].ia_type));
+ st_mode_from_ia(src[i].ia_prot, dst[i].ia_type),
+ ec_msg_str(fop));
return 0;
}
@@ -342,9 +343,8 @@ out:
}
static int32_t
-ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which,
- char *key, char *new_key, const char *def,
- gf_boolean_t global, ...)
+ec_dict_data_concat(ec_cbk_data_t *cbk, int32_t which, char *key, char *new_key,
+ const char *def, gf_boolean_t global, const char *fmt, ...)
{
ec_t *ec = cbk->fop->xl->private;
data_t *data[ec->nodes];
@@ -356,7 +356,7 @@ ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which,
ec_dict_list(data, cbk, which, key, global);
- va_start(args, global);
+ va_start(args, fmt);
err = ec_concat_prepare(cbk->fop->xl, &pre, &sep, &post, fmt, args);
va_end(args);
@@ -485,22 +485,12 @@ ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key)
tmp = NULL;
- len = dict_serialized_length(lockinfo);
- if (len < 0) {
- err = len;
-
- goto out;
- }
- ptr = GF_MALLOC(len, gf_common_mt_char);
- if (ptr == NULL) {
- err = -ENOMEM;
-
- goto out;
- }
- err = dict_serialize(lockinfo, ptr);
+ err = dict_allocate_and_serialize(lockinfo, (char **)&ptr,
+ (unsigned int *)&len);
if (err != 0) {
goto out;
}
+
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
err = dict_set_dynptr(dict, key, ptr, len);
if (err != 0) {
@@ -739,14 +729,14 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0)) {
- return ec_dict_data_concat("(<EC:%s> { })", data->cbk, data->which, key,
- NULL, NULL, _gf_false,
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, _gf_false, "(<EC:%s> { })",
data->cbk->fop->xl->name);
}
if (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) {
- return ec_dict_data_concat("{\n}", data->cbk, data->which, key, NULL,
- NULL, _gf_false);
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, "{\n}");
}
if (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0) {
@@ -776,9 +766,9 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
if (XATTR_IS_NODE_UUID(key)) {
if (data->cbk->fop->int32) {
/* List of node uuid is requested */
- return ec_dict_data_concat("{ }", data->cbk, data->which, key,
+ return ec_dict_data_concat(data->cbk, data->which, key,
GF_XATTR_LIST_NODE_UUIDS_KEY, UUID0_STR,
- _gf_true);
+ _gf_true, "{ }");
} else {
return ec_dict_data_uuid(data->cbk, data->which, key);
}
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index e85aa8bf43f..b955efd8c2d 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -101,6 +101,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
{
uintptr_t need_open = 0;
int ret = 0;
+ int32_t flags = 0;
loc_t loc = {
0,
};
@@ -121,6 +122,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
goto out;
}
+ flags = fop->fd->flags & (~(O_TRUNC | O_APPEND | O_CREAT | O_EXCL));
if (IA_IFDIR == fop->fd->inode->ia_type) {
ec_opendir(fop->frame, fop->xl, need_open,
EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL,
@@ -128,7 +130,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
} else {
ec_open(fop->frame, fop->xl, need_open,
EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc,
- fop->fd->flags & (~O_TRUNC), fop->fd, NULL);
+ flags, fop->fd, NULL);
}
out:
@@ -228,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
int32_t
ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good,
- uintptr_t bad, dict_t *xdata)
+ uintptr_t bad, uint32_t pending, dict_t *xdata)
{
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL,
@@ -314,17 +316,19 @@ ec_check_status(ec_fop_data_t *fop)
}
}
- gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
- "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
- "remaining=%s, good=%s, bad=%s, %s)",
- gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
- ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
- ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
- ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
- ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
- ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
- ec->nodes),
- ec_msg_str(fop));
+ gf_msg(
+ fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+ "remaining=%s, good=%s, bad=%s,"
+ "(Least significant bit represents first client/brick of subvol), %s)",
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+ ec->nodes),
+ ec_msg_str(fop));
if (fop->use_fd) {
if (fop->fd != NULL) {
ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
@@ -612,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop)
loc_t *loc2 = NULL;
char gfid1[64] = {0};
char gfid2[64] = {0};
+ ec_fop_data_t *parent = fop->parent;
if (fop->errstr)
return fop->errstr;
-
if (!fop->use_fd) {
loc1 = &fop->loc[0];
loc2 = &fop->loc[1];
@@ -623,23 +627,45 @@ ec_msg_str(ec_fop_data_t *fop)
if (fop->id == GF_FOP_RENAME) {
gf_asprintf(&fop->errstr,
"FOP : '%s' failed on '%s' and '%s' with gfids "
- "%s and %s respectively",
+ "%s and %s respectively. Parent FOP: %s",
ec_fop_name(fop->id), loc1->path, loc2->path,
uuid_utoa_r(loc1->gfid, gfid1),
- uuid_utoa_r(loc2->gfid, gfid2));
+ uuid_utoa_r(loc2->gfid, gfid2),
+ parent ? ec_fop_name(parent->id) : "No Parent");
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s",
- ec_fop_name(fop->id), loc1->path,
- uuid_utoa_r(loc1->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr,
+ "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), loc1->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s",
- ec_fop_name(fop->id),
- uuid_utoa_r(fop->fd->inode->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
return fop->errstr;
}
+static void
+ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
+ int32_t loglevel)
+{
+ ec_t *ec = fop->xl->private;
+ char str1[32], str2[32], str3[32];
+
+ gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
+ "Insufficient available children for this request: "
+ "Have : %d, Need : %u : Child UP : %s "
+ "Mask: %s, Healing : %s : %s ",
+ have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
+ ec_msg_str(fop));
+}
+
static int32_t
ec_child_select(ec_fop_data_t *fop)
{
@@ -654,6 +680,9 @@ ec_child_select(ec_fop_data_t *fop)
* unlock should go on all subvols where lock is performed*/
if (fop->parent && !ec_internal_op(fop)) {
fop->mask &= (fop->parent->mask & ~fop->parent->healing);
+ if (ec_is_data_fop(fop->id)) {
+ fop->healing |= fop->parent->healing;
+ }
}
if ((fop->mask & ~ec->xl_up) != 0) {
@@ -694,14 +723,19 @@ ec_child_select(ec_fop_data_t *fop)
ec_trace("SELECT", fop, "");
if ((num < fop->minimum) && (num < ec->fragments)) {
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, fop->minimum, ec_msg_str(fop));
+ ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
return 0;
}
+ if (!fop->parent && fop->lock_count &&
+ (fop->locks[0].update[EC_DATA_TXN] ||
+ fop->locks[0].update[EC_METADATA_TXN])) {
+ if (ec->quorum_count && (num < ec->quorum_count)) {
+ ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
+ return 0;
+ }
+ }
+
return 1;
}
@@ -1405,27 +1439,28 @@ ec_get_size_version(ec_lock_link_t *link)
!ec_is_data_fop(fop->id))
link->optimistic_changelog = _gf_true;
+ memset(&loc, 0, sizeof(loc));
+
+ LOCK(&lock->loc.inode->lock);
+
set_dirty = ec_set_dirty_flag(link, ctx, dirty);
/* If ec metadata has already been retrieved, do not try again. */
- if (ctx->have_info && (!set_dirty)) {
+ if (ctx->have_info) {
if (ec_is_data_fop(fop->id)) {
fop->healing |= lock->healing;
}
- return;
+ if (!set_dirty)
+ goto unlock;
}
/* Determine if there's something we need to retrieve for the current
* operation. */
if (!set_dirty && !lock->query && (lock->loc.inode->ia_type != IA_IFREG) &&
(lock->loc.inode->ia_type != IA_INVAL)) {
- return;
+ goto unlock;
}
- memset(&loc, 0, sizeof(loc));
-
- LOCK(&lock->loc.inode->lock);
-
changed_flags = ec_set_xattrop_flags_and_params(lock, link, dirty);
if (link->waiting_flags) {
/* This fop needs to wait until all its flags are cleared which
@@ -1436,6 +1471,7 @@ ec_get_size_version(ec_lock_link_t *link)
GF_ASSERT(!changed_flags);
}
+unlock:
UNLOCK(&lock->loc.inode->lock);
if (!changed_flags)
@@ -1847,6 +1883,10 @@ ec_lock_acquired(ec_lock_link_t *link)
LOCK(&lock->loc.inode->lock);
lock->acquired = _gf_true;
+ if (lock->contention) {
+ lock->release = _gf_true;
+ lock->contention = _gf_false;
+ }
ec_lock_update_fd(lock, fop);
ec_lock_wake_shared(lock, &list);
@@ -1872,15 +1912,20 @@ ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
ec_lock_link_t *link = NULL;
ec_lock_t *lock = NULL;
+ link = fop->data;
+ lock = link->lock;
if (op_ret >= 0) {
- link = fop->data;
- lock = link->lock;
lock->mask = lock->good_mask = fop->good;
lock->healing = 0;
ec_lock_acquired(link);
ec_lock(fop->parent);
} else {
+ LOCK(&lock->loc.inode->lock);
+ {
+ lock->contention = _gf_false;
+ }
+ UNLOCK(&lock->loc.inode->lock);
gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED,
"Failed to complete preop lock");
}
@@ -2211,7 +2256,7 @@ ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED,
- "entry/inode unlocking failed (%s)", ec_fop_name(link->fop->id));
+ "entry/inode unlocking failed :(%s)", ec_msg_str(link->fop));
} else {
ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock);
}
@@ -2248,6 +2293,23 @@ ec_unlock_lock(ec_lock_link_t *link)
}
}
+void
+ec_inode_bad_inc(inode_t *inode, xlator_t *xl)
+{
+ ec_inode_t *ctx = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ctx = __ec_inode_get(inode, xl);
+ if (ctx == NULL) {
+ goto unlock;
+ }
+ ctx->bad_version++;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+}
+
int32_t
ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xattr,
@@ -2263,6 +2325,12 @@ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
ctx = lock->ctx;
if (op_ret < 0) {
+ if (link->lock->fd == NULL) {
+ ec_inode_bad_inc(link->lock->loc.inode, this);
+ } else {
+ ec_inode_bad_inc(link->lock->fd->inode, this);
+ }
+
gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno,
EC_MSG_SIZE_VERS_UPDATE_FAIL,
"Failed to update version and size. %s", ec_msg_str(fop));
@@ -2504,6 +2572,13 @@ ec_lock_release(ec_t *ec, inode_t *inode)
gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention",
inode);
+ if (!lock->acquired) {
+ /* This happens if some bricks already got the lock while inodelk is in
+ * progress. Set release to true after lock is acquired*/
+ lock->contention = _gf_true;
+ goto done;
+ }
+
/* The lock is not marked to be released, so the frozen list should be
* empty. */
GF_ASSERT(list_empty(&lock->frozen));
@@ -2955,3 +3030,13 @@ ec_manager(ec_fop_data_t *fop, int32_t error)
__ec_manager(fop, error);
}
+
+gf_boolean_t
+__ec_is_last_fop(ec_t *ec)
+{
+ if ((list_empty(&ec->pending_fops)) &&
+ (GF_ATOMIC_GET(ec->async_fop_count) == 0)) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index e94834219b9..51493612ac6 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -11,8 +11,7 @@
#ifndef __EC_COMMON_H__
#define __EC_COMMON_H__
-#include <glusterfs/xlator.h>
-
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
#include "ec-data.h"
typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
@@ -26,6 +25,30 @@ typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
#define EC_FLAG_LOCK_SHARED 0x0001
+#define QUORUM_CBK(fn, fop, frame, cookie, this, op_ret, op_errno, params...) \
+ do { \
+ ec_t *__ec = fop->xl->private; \
+ int32_t __op_ret = 0; \
+ int32_t __op_errno = 0; \
+ int32_t __success_count = gf_bits_count(fop->good); \
+ \
+ __op_ret = op_ret; \
+ __op_errno = op_errno; \
+ if (!fop->parent && frame && \
+ (GF_CLIENT_PID_SELF_HEALD != frame->root->pid) && \
+ __ec->quorum_count && (__success_count < __ec->quorum_count) && \
+ op_ret >= 0) { \
+ __op_ret = -1; \
+ __op_errno = EIO; \
+ gf_msg(__ec->xl->name, GF_LOG_ERROR, 0, \
+ EC_MSG_CHILDS_INSUFFICIENT, \
+ "Insufficient available children for this request " \
+ "(have %d, need %d). %s", \
+ __success_count, __ec->quorum_count, ec_msg_str(fop)); \
+ } \
+ fn(frame, cookie, this, __op_ret, __op_errno, params); \
+ } while (0)
+
enum _ec_xattrop_flags {
EC_FLAG_XATTROP,
EC_FLAG_DATA_DIRTY,
@@ -204,4 +227,8 @@ void
ec_reset_entry_healing(ec_fop_data_t *fop);
char *
ec_msg_str(ec_fop_data_t *fop);
+gf_boolean_t
+__ec_is_last_fop(ec_t *ec);
+void
+ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop);
#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
index 6ef934080a9..06388833546 100644
--- a/xlators/cluster/ec/src/ec-data.c
+++ b/xlators/cluster/ec/src/ec-data.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include "ec-mem-types.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-data.h"
@@ -202,11 +201,13 @@ ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify)
{
ec_t *ec = fop->xl->private;
+ *notify = _gf_false;
+
if (!list_empty(&fop->pending_list)) {
LOCK(&ec->lock);
{
list_del_init(&fop->pending_list);
- *notify = list_empty(&ec->pending_fops);
+ *notify = __ec_is_last_fop(ec);
}
UNLOCK(&ec->lock);
}
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index 8310d4a1a8c..f71dcfac293 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -8,15 +8,11 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/xlator.h>
-#include <glusterfs/defaults.h>
-
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
/****************************************************************
@@ -127,13 +123,15 @@ ec_manager_opendir(ec_fop_data_t *fop, int32_t state)
return EC_STATE_REPORT;
}
- err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
- if (err != 0) {
- UNLOCK(&fop->fd->lock);
+ if (!ctx->loc.inode) {
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
- fop->error = -err;
+ fop->error = -err;
- return EC_STATE_REPORT;
+ return EC_STATE_REPORT;
+ }
}
UNLOCK(&fop->fd->lock);
@@ -388,9 +386,16 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
/* Return error if opendir has not been successfully called on
* any subvolume. */
ctx = ec_fd_get(fop->fd, fop->xl);
- if ((ctx == NULL) || (ctx->open == 0)) {
- fop->error = EINVAL;
+ if (ctx == NULL) {
+ fop->error = ENOMEM;
+ } else if (ctx->open == 0) {
+ fop->error = EBADFD;
+ }
+ if (fop->error) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error,
+ EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s",
+ ec_msg_str(fop));
return EC_STATE_REPORT;
}
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
index 0b8ee219f87..53d27d895c3 100644
--- a/xlators/cluster/ec/src/ec-dir-write.c
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -8,9 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/xlator.h>
-#include <glusterfs/defaults.h>
-
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
@@ -218,10 +215,10 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NU