[Gluster-devel] gluster hang - 1.3.0-pre3

Christopher Hawkins chawkins at bplinux.com
Wed May 23 04:21:23 UTC 2007


Hello, 

Thought I'd submit this for your review... I'll try to repeat it tomorrow. I
was rsyncing a large directory tree (about 2 GB) when the process just hung.
Had to kill it from another terminal... All glusterfs functions died on
servers and clients. The errors in the log on the client (appears to be the
same on the server) repeat over and over for several hundred lines:

[May 22 23:43:40] [CRITICAL/client-protocol.c:218/call_bail()]
client/protocol:bailing transport
[May 22 23:43:40] [ERROR/common-utils.c:55/full_rw()] libglusterfs:full_rw:
0 bytes r/w instead of 113 (errno=115)
[May 22 23:43:40] [CRITICAL/tcp.c:82/tcp_disconnect()] transport/tcp:closing
socket: 8 priv->connected = 1
[May 22 23:43:40] [ERROR/client-protocol.c:204/client_protocol_xfer()]
protocol/client:transport_submit failed
[May 22 23:43:40] [CRITICAL/client-protocol.c:218/call_bail()]
client/protocol:bailing transport
[May 22 23:43:40] [CRITICAL/tcp.c:82/tcp_disconnect()] transport/tcp:closing
socket: 7 priv->connected = 1
[May 22 23:43:40] [ERROR/client-protocol.c:204/client_protocol_xfer()]
protocol/client:transport_submit failed
[May 22 23:43:41] [CRITICAL/client-protocol.c:218/call_bail()]
client/protocol:bailing transport
[May 22 23:43:41] [CRITICAL/client-protocol.c:218/call_bail()]
client/protocol:bailing transport
[May 22 23:43:41] [CRITICAL/client-protocol.c:218/call_bail()]
client/protocol:bailing transport
[May 22 23:43:41] [CRITICAL/tcp.c:82/tcp_disconnect()] transport/tcp:closing
socket: 7 priv->connected = 1
[May 22 23:43:41] [CRITICAL/tcp.c:82/tcp_disconnect()] transport/tcp:closing
socket: 4 priv->connected = 1
[May 22 23:43:41] [ERROR/client-protocol.c:204/client_protocol_xfer()]
protocol/client:transport_submit failed
[May 22 23:43:41] [ERROR/common-utils.c:110/full_rwv()]
libglusterfs:full_rwv: 0 bytes r/w instead of 302 (Broken pipe)
[May 22 23:43:41] [ERROR/client-protocol.c:204/client_protocol_xfer()]
protocol/client:transport_submit failed
[May 22 23:43:41] [ERROR/tcp.c:111/tcp_except()] transport/tcp:shutdown () -
error: Transport endpoint is not connected
[May 22 23:43:41] [CRITICAL/tcp.c:82/tcp_disconnect()] transport/tcp:closing
socket: 9 priv->connected = 1
[May 22 23:43:41] [ERROR/client-protocol.c:204/client_protocol_xfer()]
protocol/client:transport_submit failed
[May 22 23:43:41] [ERROR/client-protocol.c:204/client_protocol_xfer()]
protocol/client:transport_submit failed
[May 22 23:43:41] [CRITICAL/client-protocol.c:218/call_bail()]
client/protocol:bailing transport
[May 22 23:43:41] [ERROR/common-utils.c:55/full_rw()] libglusterfs:full_rw:
0 bytes r/w instead of 113 (errno=115)
[May 22 23:43:41] [CRITICAL/tcp.c:82/tcp_disconnect()] transport/tcp:closing
socket: 8 priv->connected = 1
[May 22 23:43:41] [ERROR/client-protocol.c:204/client_protocol_xfer()]
protocol/client:transport_submit failed
[May 22 23:43:41] [CRITICAL/client-protocol.c:218/call_bail()]
client/protocol:bailing transport
[May 22 23:43:41] [CRITICAL/client-protocol.c:218/call_bail()]
client/protocol:bailing transport
[May 22 23:43:41] [CRITICAL/client-protocol.c:218/call_bail()]
client/protocol:bailing transport
[May 22 23:43:41] [ERROR/common-utils.c:110/full_rwv()]
libglusterfs:full_rwv: 0 bytes r/w instead of 299 (Broken pipe)
[May 22 23:43:41] [ERROR/client-protocol.c:204/client_protocol_xfer()]
protocol/client:transport_submit failed
[May 22 23:43:41] [ERROR/tcp.c:111/tcp_except()] transport/tcp:shutdown () -
error: Transport endpoint is not connected
[May 22 23:43:41] [ERROR/common-utils.c:55/full_rw()] libglusterfs:full_rw:
0 bytes r/w instead of 113 (errno=107)
[May 22 23:43:41] [CRITICAL/tcp.c:82/tcp_disconnect()] transport/tcp:closing
socket: 4 priv->connected = 1
[May 22 23:43:41] [CRITICAL/tcp.c:82/tcp_disconnect()] transport/tcp:closing
socket: 8 priv->connected = 1
[May 22 23:43:41] [ERROR/common-utils.c:55/full_rw()] libglusterfs:full_rw:
0 bytes r/w instead of 113 (errno=104)
[May 22 23:43:41] [ERROR/common-utils.c:110/full_rwv()]
libglusterfs:full_rwv: 0 bytes r/w instead of 284 (Broken pipe)
[May 22 23:43:41] [ERROR/client-protocol.c:204/client_protocol_xfer()]
protocol/client:transport_submit failed
[May 22 23:43:41] [ERROR/tcp.c:111/tcp_except()] transport/tcp:shutdown () -
error: Transport endpoint is not connected
[May 22 23:43:41] [CRITICAL/tcp.c:82/tcp_disconnect()] transport/tcp:closing
socket: 7 priv->connected = 1
[May 22 23:43:41] [ERROR/client-protocol.c:204/client_protocol_xfer()]
protocol/client:transport_submit failed
[May 22 23:43:41] [ERROR/client-protocol.c:204/client_protocol_xfer()]
protocol/client:transport_submit failed
[May 22 23:43:41] [ERROR/client-protocol.c:204/client_protocol_xfer()]
protocol/client:transport_submit failed
[May 22 23:43:41] [CRITICAL/client-protocol.c:218/call_bail()]
client/protocol:bailing transport
[May 22 23:43:41] [CRITICAL/client-protocol.c:218/call_bail()]
client/protocol:bailing transport
[May 22 23:43:41] [CRITICAL/client-protocol.c:218/call_bail()]
client/protocol:bailing transport
[May 22 23:43:41] [CRITICAL/tcp.c:82/tcp_disconnect()] transport/tcp:closing
socket: 8 priv->connected = 1

My server config file is very basic, and my client config is this:
volume client1
  type protocol/client
  option transport-type tcp/client     # for TCP/IP transport
  option remote-host 192.168.1.20      # IP address of the remote brick
  option remote-subvolume storage1        # name of the remote volume
end-volume

volume client2
  type protocol/client
  option transport-type tcp/client     # for TCP/IP transport
  option remote-host 192.168.1.21      # IP address of the remote brick
  option remote-subvolume storage2        # name of the remote volume
end-volume

volume client3
  type protocol/client
  option transport-type tcp/client     # for TCP/IP transport
  option remote-host 192.168.1.22      # IP address of the remote brick
  option remote-subvolume storage3        # name of the remote volume
end-volume

volume cluster-pool
  type cluster/unify
  subvolumes client1 client2 client3
  option scheduler alu
  option alu.limits.min-free-disk  5GB   # Stop creating files when
free-space lt 60GB
  option alu.limits.max-open-files 10000
  option alu.order
disk-usage:read-usage:write-usage:open-files-usage:disk-speed-usage
  option alu.disk-usage.entry-threshold 2GB          # Units in KB, MB and
GB are allowed
  option alu.disk-usage.exit-threshold  60MB         # Units in KB, MB and
GB are allowed
  option alu.open-files-usage.entry-threshold 1024
  option alu.open-files-usage.exit-threshold 32
  option alu.stat-refresh.interval 10sec
end-volume

volume writeback
  type performance/write-behind
  option aggregate-size 131072 # unit in bytes
  subvolumes cluster-pool
end-volume

### Add readahead feature
volume readahead
  type performance/read-ahead
  option page-size 65536     # unit in bytes
  option page-count 16       # cache per file  = (page-count x page-size)
  subvolumes writeback
end-volume

### Add stat-prefetch feature
### If you are not concerned about performance of interactive commands
### like "ls -l", you wouln't need this translator.
 volume statprefetch
   type performance/stat-prefetch
   option cache-seconds 2   # timeout for stat cache
   subvolumes readahead
 end-volume






More information about the Gluster-devel mailing list