[Gluster-devel] ping timer expired! bailing transport

Dan Parsons dparsons at nyip.net
Tue Mar 10 05:34:17 UTC 2009


I just received this error message using rc4:
2009-03-09 21:58:16 E [client-protocol.c:505:client_ping_timer_expired]
distfs03-stripe: ping timer expired! bailing transport
2009-03-09 21:58:16 N [client-protocol.c:6607:notify] distfs03-stripe:
disconnected

It happened a total of 7 times across my 33 client nodes. It doesn't seem to
be related to any particular client, but the errors did happen mostly
(though not always) on the unify-ns server. The gluster servers are under
pretty heavy network utilization, however it doesn't seem to be near the
link capacity and in any case, i/o should just block if it's slow to
respond, correct? Fortunately, gluster is automatically reconnecting after
the error. I don't remember seeing this in rc2. The only corresponding
errors in the server logs are simply showing the client disconnecting. I've
also ruled out any interconnect faults.

Any suggestions? My configs are below.

Dan

CLIENT CONFIG:

volume unify-switch-ns
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.51
   option remote-subvolume posix-unify-switch-ns
end-volume

#volume distfs01-ns-readahead
#   type performance/read-ahead
#   option page-size 1MB
#   option page-count 8
#   subvolumes distfs01-ns-brick
#end-volume

#volume unify-switch-ns
#   type performance/write-behind
#   option block-size 1MB
#   option cache-size 3MB
#   subvolumes distfs01-ns-readahead
#end-volume

volume distfs01-unify
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.51
   option remote-subvolume posix-unify
end-volume

volume distfs02-unify
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.52
   option remote-subvolume posix-unify
end-volume

volume distfs03-unify
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.53
   option remote-subvolume posix-unify
end-volume

volume distfs04-unify
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.54
   option remote-subvolume posix-unify
end-volume

volume distfs01-stripe
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.51
   option remote-subvolume posix-stripe
end-volume

volume distfs02-stripe
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.52
   option remote-subvolume posix-stripe
end-volume

volume distfs03-stripe
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.53
   option remote-subvolume posix-stripe
end-volume

volume distfs04-stripe
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.54
   option remote-subvolume posix-stripe
end-volume

volume stripe0
type cluster/stripe
option block-size *.jar,*.pin:1MB,*:2MB
subvolumes distfs01-stripe distfs02-stripe distfs03-stripe distfs04-stripe
end-volume

volume dht0
type cluster/dht
# option lookup-unhashed yes
subvolumes distfs01-unify distfs02-unify distfs03-unify distfs04-unify
end-volume

volume unify
type cluster/unify
option namespace unify-switch-ns
option self-heal off
option scheduler switch
# send *.phr/psq/pnd etc to stripe0, send the rest to hash
# extensions have to be *.foo* and not simply *.foo or rsync's tmp file
naming will prevent files from being matched
option scheduler.switch.case
*.phr*:stripe0;*.psq*:stripe0;*.pnd*:stripe0;*.psd*:stripe0;*.pin*:stripe0;*.nsi*:stripe0;*.nin*:stripe0;*.nsd*:stripe0;*.nhr*:stripe0;*.nsq*:stripe0;*.tar*:stripe0;*.tar.gz*:stripe0;*.jar*:stripe0;*.img*:stripe0;*.perf*:stripe0;*.tgz*:stripe0;*.fasta*:stripe0;*.huge*:stripe0
subvolumes stripe0 dht0
end-volume

volume ioc
   type performance/io-cache
   subvolumes unify
   option cache-size 3000MB
option cache-timeout 3600
end-volume

volume filter
  type features/filter
  option fixed-uid 0
  option fixed-gid 900
  subvolumes ioc
end-volume




SERVER CONFIG:
volume posix-unify-brick
type storage/posix
option directory /distfs-storage-space/glusterfs/unify
# the below line is here to make the output of 'df' accurate, as both
volumes are served from the same local drive
option export-statfs-size off
end-volume

volume posix-stripe-brick
        type storage/posix
        option directory /distfs-storage-space/glusterfs/stripe
end-volume

volume posix-unify-switch-ns-brick
        type storage/posix
        option directory /distfs-storage-space/glusterfs/unify-switch-ns
end-volume

volume posix-unify
type performance/io-threads
option thread-count 4
subvolumes posix-unify-brick
end-volume

volume posix-stripe
type performance/io-threads
option thread-count 4
subvolumes posix-stripe-brick
end-volume

volume posix-unify-switch-ns
type performance/io-threads
option thread-count 2
subvolumes posix-unify-switch-ns-brick
end-volume

volume server
type protocol/server
option transport-type tcp
option auth.addr.posix-unify.allow 10.8.101.*,10.8.15.50
option auth.addr.posix-stripe.allow 10.8.101.*,10.8.15.50
option auth.addr.posix-unify-switch-ns.allow 10.8.101.*,10.8.15.50
subvolumes posix-unify posix-stripe posix-unify-switch-ns
end-volume
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://supercolony.gluster.org/pipermail/gluster-devel/attachments/20090309/96c5d169/attachment-0003.html>


More information about the Gluster-devel mailing list