[Gluster-devel] ESTALE / "Stale NFS file handle"

Amar Tumballi amar at zresearch.com
Tue Mar 10 10:13:26 UTC 2009


Hi Dan,
 These problems poped up when we did some bug fixes in our hashing
algorithm. (it was alright in 1.4.0preX releases, and got corrected from
rc2). This behavior can happen for all those who used dht in rc1, and now
shifted to higher rcX releases.
 If you notice the name length is multiple of 16 (32 in this case), and to
fix this, we have two approach as of now. Have a separate '/mnt/debug'
mountpoint with 'option lookup-unhashed yes' in dht. Now stat the files
which are of filename length 16 or multiple of 16 over this debug
mountpoint. This should fix your missing file problem on main mountpoint
also (as it creates a proper linkfile in proper hashed volume).

Sorry for this in-convenience.

Regards,
Amar


On Mon, Mar 9, 2009 at 10:40 PM, Dan Parsons <dparsons at nyip.net> wrote:

> I'm getting the below error messages in rc4. Like my previous email, there
> doesn't seem to be any pattern as to which server/client it's happening on,
> though the errors are occurring fairly frequently.
> 2009-03-09 17:32:26 E [unify.c:585:unify_lookup] unify: returning ESTALE
> for
> /bio/data/fast-hmmsearch-all/tmpP986E__fast-hmmsearch-all_job/result.tigrfam.TIGR02622.hmmhits:
> file count is 1
> 2009-03-09 17:32:26 E [unify.c:591:unify_lookup] unify:
> /bio/data/fast-hmmsearch-all/tmpP986E__fast-hmmsearch-all_job/result.tigrfam.TIGR02622.hmmhits:
> found on unify-switch-ns
> 2009-03-09 17:32:26 W [fuse-bridge.c:301:need_fresh_lookup] fuse-bridge:
> revalidate of
> /bio/data/fast-hmmsearch-all/tmpP986E__fast-hmmsearch-all_job/result.tigrfam.TIGR02622.hmmhits
> failed (Stale NFS file handle)
>
> 2009-03-09 17:32:28 E [unify.c:360:unify_lookup_cbk] unify: child(dht0):
> path(/bio/data/fast-hmmsearch-all/tmpP986E__fast-hmmsearch-all_job/result.tigrfam.TIGR01420.hmmhits):
> No such file or directory
> 2009-03-09 17:32:28 E [unify.c:360:unify_lookup_cbk] unify:
> child(unify-switch-ns):
> path(/bio/data/fast-hmmsearch-all/tmpP986E__fast-hmmsearch-all_job/result.tigrfam.TIGR01420.hmmhits):
> No such file or directory
>
> As you can see, there are two separate sets of errors for two different
> files, though both errors are troubling. This problem has persisted from rc2
> to rc4, though I can't say for certain that it was introduced in rc2 (I
> think it was there prior to that as well). No matching errors in server
> logs.
>
> Any suggestions? My configs are below. Thanks!
>
> CLIENT CONFIG:
>
> volume unify-switch-ns
>    type protocol/client
>    option transport-type tcp
>    option remote-host 10.8.101.51
>    option remote-subvolume posix-unify-switch-ns
> end-volume
>
> #volume distfs01-ns-readahead
> #   type performance/read-ahead
> #   option page-size 1MB
> #   option page-count 8
> #   subvolumes distfs01-ns-brick
> #end-volume
>
> #volume unify-switch-ns
> #   type performance/write-behind
> #   option block-size 1MB
> #   option cache-size 3MB
> #   subvolumes distfs01-ns-readahead
> #end-volume
>
> volume distfs01-unify
>    type protocol/client
>    option transport-type tcp
>    option remote-host 10.8.101.51
>    option remote-subvolume posix-unify
> end-volume
>
> volume distfs02-unify
>    type protocol/client
>    option transport-type tcp
>    option remote-host 10.8.101.52
>    option remote-subvolume posix-unify
> end-volume
>
> volume distfs03-unify
>    type protocol/client
>    option transport-type tcp
>    option remote-host 10.8.101.53
>    option remote-subvolume posix-unify
> end-volume
>
> volume distfs04-unify
>    type protocol/client
>    option transport-type tcp
>    option remote-host 10.8.101.54
>    option remote-subvolume posix-unify
> end-volume
>
> volume distfs01-stripe
>    type protocol/client
>    option transport-type tcp
>    option remote-host 10.8.101.51
>    option remote-subvolume posix-stripe
> end-volume
>
> volume distfs02-stripe
>    type protocol/client
>    option transport-type tcp
>    option remote-host 10.8.101.52
>    option remote-subvolume posix-stripe
> end-volume
>
> volume distfs03-stripe
>    type protocol/client
>    option transport-type tcp
>    option remote-host 10.8.101.53
>    option remote-subvolume posix-stripe
> end-volume
>
> volume distfs04-stripe
>    type protocol/client
>    option transport-type tcp
>    option remote-host 10.8.101.54
>    option remote-subvolume posix-stripe
> end-volume
>
> volume stripe0
>  type cluster/stripe
> option block-size *.jar,*.pin:1MB,*:2MB
> subvolumes distfs01-stripe distfs02-stripe distfs03-stripe distfs04-stripe
> end-volume
>
> volume dht0
> type cluster/dht
> # option lookup-unhashed yes
> subvolumes distfs01-unify distfs02-unify distfs03-unify distfs04-unify
> end-volume
>
> volume unify
> type cluster/unify
> option namespace unify-switch-ns
> option self-heal off
>  option scheduler switch
> # send *.phr/psq/pnd etc to stripe0, send the rest to hash
> # extensions have to be *.foo* and not simply *.foo or rsync's tmp file
> naming will prevent files from being matched
>  option scheduler.switch.case
> *.phr*:stripe0;*.psq*:stripe0;*.pnd*:stripe0;*.psd*:stripe0;*.pin*:stripe0;*.nsi*:stripe0;*.nin*:stripe0;*.nsd*:stripe0;*.nhr*:stripe0;*.nsq*:stripe0;*.tar*:stripe0;*.tar.gz*:stripe0;*.jar*:stripe0;*.img*:stripe0;*.perf*:stripe0;*.tgz*:stripe0;*.fasta*:stripe0;*.huge*:stripe0
>  subvolumes stripe0 dht0
> end-volume
>
> volume ioc
>    type performance/io-cache
>    subvolumes unify
>    option cache-size 3000MB
>  option cache-timeout 3600
> end-volume
>
> volume filter
>   type features/filter
>   option fixed-uid 0
>   option fixed-gid 900
>   subvolumes ioc
> end-volume
>
>
>
>
> SERVER CONFIG:
> volume posix-unify-brick
> type storage/posix
>  option directory /distfs-storage-space/glusterfs/unify
> # the below line is here to make the output of 'df' accurate, as both
> volumes are served from the same local drive
>  option export-statfs-size off
> end-volume
>
> volume posix-stripe-brick
>         type storage/posix
>         option directory /distfs-storage-space/glusterfs/stripe
> end-volume
>
> volume posix-unify-switch-ns-brick
>         type storage/posix
>         option directory /distfs-storage-space/glusterfs/unify-switch-ns
> end-volume
>
> volume posix-unify
> type performance/io-threads
> option thread-count 4
> subvolumes posix-unify-brick
> end-volume
>
> volume posix-stripe
> type performance/io-threads
> option thread-count 4
>  subvolumes posix-stripe-brick
> end-volume
>
> volume posix-unify-switch-ns
> type performance/io-threads
>  option thread-count 2
> subvolumes posix-unify-switch-ns-brick
> end-volume
>
> volume server
>  type protocol/server
> option transport-type tcp
> option auth.addr.posix-unify.allow 10.8.101.*,10.8.15.50
>  option auth.addr.posix-stripe.allow 10.8.101.*,10.8.15.50
> option auth.addr.posix-unify-switch-ns.allow 10.8.101.*,10.8.15.50
>  subvolumes posix-unify posix-stripe posix-unify-switch-ns
> end-volume
>
>
> _______________________________________________
> Gluster-devel mailing list
> Gluster-devel at nongnu.org
> http://lists.nongnu.org/mailman/listinfo/gluster-devel
>
>


-- 
Amar Tumballi
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://supercolony.gluster.org/pipermail/gluster-devel/attachments/20090310/a8e7ea4c/attachment-0003.html>


More information about the Gluster-devel mailing list