[Gluster-users] gluster(1.3.10) becomes unstable after some time

Roman Hlynovskiy roman.hlynovskiy at gmail.com
Mon Sep 8 08:45:26 UTC 2008


hello all,

i have a setup of 4 identical servers. each of them exports 2 data
bricks and 1 namespace brick.
each first brick of server is AFR'ed with second brick of previous
server. so, this configurations gives some service redundancy in case
of failure of one of the servers.
all the namespace bricks are also AFR'ed into one.
below you can find my configuration from the first server. as it can
be seen, for client configuration I used local bricks from this
server: brick1, brick2, brickns instead of network-exported from this
server brick01, brick02, brick01ns for i/o reading improvement. So,
the second server uses brick1, brick2, brickns instead of brick03,
brick04, brick02ns etc

The first problem I saw: After 20 minutes of some basic tests with
file copying gluster mount on all servers became unavailable.

I see the following errors in the log:
2008-09-08 14:26:36 W [client-protocol.c:205:call_bail] brick03ns:
activating bail-out. pending frames = 1. last sent = 2008-09-08
14:19:43. last received = 2008-09-08 14:19:43 transport-timeout = 42
2008-09-08 14:26:36 C [client-protocol.c:212:call_bail] brick03ns:
bailing transport
2008-09-08 14:26:36 E [tcp.c:124:tcp_except] brick03ns: shutdown () -
error: Transport endpoint is not connected
2008-09-08 14:26:36 W [client-protocol.c:205:call_bail] brick05:
activating bail-out. pending frames = 1. last sent = 2008-09-08
14:19:43. last received = 2008-09-08 14:19:43 transport-timeout = 42
2008-09-08 14:26:36 C [client-protocol.c:212:call_bail] brick05:
bailing transport
2008-09-08 14:26:36 E [tcp.c:124:tcp_except] brick05: shutdown () -
error: Transport endpoint is not connected
2008-09-08 14:26:36 W [client-protocol.c:205:call_bail] brick06:
activating bail-out. pending frames = 1. last sent = 2008-09-08
14:19:43. last received = 2008-09-08 14:19:43 transport-timeout = 42
2008-09-08 14:26:36 C [client-protocol.c:212:call_bail] brick06:
bailing transport
2008-09-08 14:26:36 E [tcp.c:124:tcp_except] brick06: shutdown () -
error: Transport endpoint is not connected
2008-09-08 14:26:41 W [client-protocol.c:205:call_bail] brick08:
activating bail-out. pending frames = 1. last sent = 2008-09-08
14:19:43. last received = 2008-09-08 14:19:43 transport-timeout = 42
2008-09-08 14:26:41 C [client-protocol.c:212:call_bail] brick08:
bailing transport
2008-09-08 14:26:41 E [tcp.c:124:tcp_except] brick08: shutdown () -
error: Transport endpoint is not connected
2008-09-08 14:26:41 W [client-protocol.c:205:call_bail] brick04ns:
activating bail-out. pending frames = 1. last sent = 2008-09-08
14:19:43. last received = 2008-09-08 14:19:43 transport-tim= 42
2008-09-08 14:26:41 C [client-protocol.c:212:call_bail] brick04ns:
bailing transport
2008-09-08 14:26:41 E [tcp.c:124:tcp_except] brick04ns: shutdown () -
error: Transport endpoint is not connected
2008-09-08 14:26:41 W [client-protocol.c:205:call_bail] brick07:
activating bail-out. pending frames = 1. last sent = 2008-09-08
14:19:43. last received = 2008-09-08 14:19:43 transport-timeout = 42
2008-09-08 14:26:41 C [client-protocol.c:212:call_bail] brick07:
bailing transport
2008-09-08 14:26:41 E [tcp.c:124:tcp_except] brick07: shutdown () -
error: Transport endpoint is not connected

The second problem I see - even with  'option
alu.read-only-subvolumes' gluster remains writing to the specified as
read-only volumes. what could be the reason for this?

----------------------
volume posix1
        type storage/posix
        option directory /mnt/os1/export
end-volume

volume locks1
        type features/posix-locks
        subvolumes posix1
        option mandatory on
end-volume

volume brick1
        type performance/io-threads
        option thread-count 4
        option cache-size 32MB
        subvolumes locks1
end-volume


volume posix2
        type storage/posix
        option directory /mnt/os2/export
end-volume

volume locks2
        type features/posix-locks
        subvolumes posix2
        option mandatory on
end-volume

volume brick2
        type performance/io-threads
        option thread-count 4
        option cache-size 32MB
        subvolumes locks2
end-volume


volume brickns
        type storage/posix
        option directory /mnt/ms
end-volume


volume server
        type protocol/server
        subvolumes brick1 brick2 brickns
        option transport-type tcp/server
        option auth.ip.brick1.allow *
        option auth.ip.brick2.allow *
        option auth.ip.brickns.allow *
end-volume



volume brick01
 type protocol/client
 option transport-type tcp/client
 option remote-host 192.168.252.11
 option remote-subvolume brick1
end-volume


volume brick02
 type protocol/client
 option transport-type tcp/client
 option remote-host 192.168.252.11
 option remote-subvolume brick2
end-volume


volume brick01ns
 type protocol/client
 option transport-type tcp/client
 option remote-host 192.168.252.11
 option remote-subvolume brickns
end-volume


volume brick03
 type protocol/client
 option transport-type tcp/client
 option remote-host 192.168.252.21
 option remote-subvolume brick1
end-volume


volume brick04
 type protocol/client
 option transport-type tcp/client
 option remote-host 192.168.252.21
 option remote-subvolume brick2
end-volume


volume brick02ns
 type protocol/client
 option transport-type tcp/client
 option remote-host 192.168.252.21
 option remote-subvolume brickns
end-volume


volume brick05
 type protocol/client
 option transport-type tcp/client
 option remote-host 192.168.252.31
 option remote-subvolume brick1
end-volume


volume brick06
 type protocol/client
 option transport-type tcp/client
 option remote-host 192.168.252.31
 option remote-subvolume brick2
end-volume


volume brick03ns
 type protocol/client
 option transport-type tcp/client
 option remote-host 192.168.252.31
 option remote-subvolume brickns
end-volume


volume brick07
 type protocol/client
 option transport-type tcp/client
 option remote-host 192.168.252.41
 option remote-subvolume brick1
end-volume


volume brick08
 type protocol/client
 option transport-type tcp/client
 option remote-host 192.168.252.41
 option remote-subvolume brick2
end-volume


volume brick04ns
 type protocol/client
 option transport-type tcp/client
 option remote-host 192.168.252.41
 option remote-subvolume brickns
end-volume


volume afr01
 type cluster/afr
 subvolumes brick2 brick03
 option read-subvolume brick2
end-volume

volume afr02
 type cluster/afr
 subvolumes brick04 brick05
end-volume

volume afr03
 type cluster/afr
 subvolumes brick06 brick07
end-volume

volume afr04
 type cluster/afr
 subvolumes brick08 brick1
 option read-subvolume brick1
end-volume

volume afrns
 type cluster/afr
 subvolumes brickns brick02ns brick03ns brick04ns
 option read-subvolume brickns
end-volume


volume unify
 type cluster/unify
 subvolumes afr01 afr02 afr03 afr04
 option namespace afrns
 option scheduler alu
 option alu.read-only-subvolumes afr02,afr03
 option alu.limits.min-free-disk  5%
 option alu.stat-refresh.interval 10sec
 option alu.order
disk-usage:read-usage:write-usage:open-files-usage:disk-speed-usage
 option alu.disk-usage.entry-threshold 1024M
 option alu.disk-usage.exit-threshold 32M
end-volume
---------------------


-- 
...WBR, Roman Hlynovskiy




More information about the Gluster-users mailing list