[Gluster-devel] brick crash/hang with io-threads in 2.5 patch 240
Amar S. Tumballi
amar at zresearch.com
Fri Jun 29 17:34:24 UTC 2007
Hi Harris,
Committed the fix to this bug in patch-244.
-bulde
On 6/29/07, Harris Landgarten <harrisl at lhjonline.com> wrote:
>
> Server spec for brick2 and namespace:
>
> ### Export volume "brick" with the contents of "/home/export" directory.
> volume posix2
> type storage/posix # POSIX FS translator
> option directory /mnt/export # Export this directory
> end-volume
>
> volume io-threads
> type performance/io-threads
> option thread-count 8
> subvolumes posix2
> end-volume
>
> ### Add POSIX record locking support to the storage brick
> volume brick
> type features/posix-locks
> option mandatory on # enables mandatory locking on all files
> subvolumes io-threads
> end-volume
>
> ### Add network serving capability to above brick.
> volume server
> type protocol/server
> option transport-type tcp/server # For TCP/IP transport
> # option transport-type ib-sdp/server # For Infiniband transport
> # option bind-address 192.168.1.10 # Default is to listen on all
> interfaces
> option listen-port 6996 # Default is 6996
> # option client-volume-filename /etc/glusterfs/glusterfs-client.vol
> subvolumes brick
> # NOTE: Access to any volume through protocol/server is denied by
> # default. You need to explicitly grant access through "auth" option.
> option auth.ip.brick.allow * # Allow access to "brick" volume
> end-volume
>
>
> volume posix3
> type storage/posix
> option directory /mnt/namespace
> end-volume
>
> volume io-threads1
> type performance/io-threads
> option thread-count 8
> subvolumes posix3
> end-volume
>
> volume brick-ns
> type features/posix-locks
> option mandatory on
> subvolumes io-threads1
> end-volume
>
> volume server
> type protocol/server
> option transport-type tcp/server
> option listen-port 6997
> subvolumes brick-ns
> option auth.ip.brick-ns.allow *
> end-volume
>
>
> Client spec:
> volume client1
> type protocol/client
> option transport-type tcp/client # for TCP/IP transport
> # option ibv-send-work-request-size 131072
> # option ibv-send-work-request-count 64
> # option ibv-recv-work-request-size 131072
> # option ibv-recv-work-request-count 64
> # option transport-type ib-sdp/client # for Infiniband transport
> # option transport-type ib-verbs/client # for ib-verbs transport
> option remote-host 10.253.59.241 # IP address of the remote brick
> # option remote-port 6996 # default server port is 6996
>
> # option transport-timeout 120 # seconds to wait for a reply
> # from server for each request
> option remote-subvolume brick # name of the remote volume
> end-volume
>
> ### Add client feature and attach to remote subvolume
> volume client2
> type protocol/client
> option transport-type tcp/client # for TCP/IP transport
> # option ibv-send-work-request-size 131072
> # option ibv-send-work-request-count 64
> # option ibv-recv-work-request-size 131072
> # option ibv-recv-work-request-count 64
> # option transport-type ib-sdp/client # for Infiniband transport
> # option transport-type ib-verbs/client # for ib-verbs transport
> option remote-host 10.255.57.171 # IP address of the remote
> brick
> # option remote-port 6996 # default server port is 6996
>
> # option transport-timeout 120 # seconds to wait for a reply
> # from server for each request
> option remote-subvolume brick # name of the remote volume
> end-volume
>
>
> volume client-ns
> type protocol/client
> option transport-type tcp/client # for TCP/IP transport
> option remote-host 10.255.57.171 # IP address of the remote brick
> option remote-port 6997 # default server port is 6996
> option remote-subvolume brick-ns # name of the remote volume
> end-volume
>
> #
> ## Add unify feature to cluster "server1" and "server2". Associate an
> ## appropriate scheduler that matches your I/O demand.
> volume bricks
> type cluster/unify
> option namespace client-ns # this will not be storage child of unify.
> subvolumes client1 client2
>
> option scheduler alu
> option lock-node client1 # first child will be lock-node
> by default
> option alu.limits.min-free-disk 5 #%
> option alu.limits.max-open-files 10000
> option alu.order disk-usage:read-usage:write-usage:open-files-usage
> option alu.disk-usage.entry-threshold 2GB
> option alu.disk-usage.exit-threshold 128MB
> option alu.open-files-usage.entry-threshold 1024
> option alu.open-files-usage.exit-threshold 32
> option alu.read-usage.entry-threshold 20 #%
> option alu.read-usage.exit-threshold 4 #%
> option alu.write-usage.entry-threshold 20 #%
> option alu.write-usage.exit-threshold 4 #%
> option alu.stat-refresh.interval 10sec
> option alu.stat-refresh.num-file-create 10
> end-volume
> #
>
> ### Add writeback feature
> volume writeback
> type performance/write-behind
> option aggregate-size 131072 # unit in bytes
> subvolumes bricks
> end-volume
>
> ### Add readahead feature
> volume readahead
> type performance/read-ahead
> option page-size 65536 # unit in bytes
> option page-count 16 # cache per file = (page-count x page-size)
> subvolumes writeback
> end-volume
>
>
>
> ----- Original Message -----
> From: "Harris Landgarten" <harrisl at lhjonline.com>
> To: "gluster-devel" <gluster-devel at nongnu.org>
> Sent: Friday, June 29, 2007 10:53:54 AM (GMT-0500) America/New_York
> Subject: Re: [Gluster-devel] brick crash/hang with io-threads in 2.5 patch
> 240
>
> read tests passed but backup crashed brick and client
>
> Here is backtrace from brick that crashed:
>
> Program received signal SIGSEGV, Segmentation fault.
> [Switching to Thread -1269179504 (LWP 30452)]
> inode_forget (inode=0x8064038, nlookup=0) at list.h:92
> 92 prev->next = next;
> (gdb) bt
> #0 inode_forget (inode=0x8064038, nlookup=0) at list.h:92
> #1 0xb75c0d0a in posix_forget () from
> /usr/lib/glusterfs/1.3.0-pre5/xlator/storage/posix.so
> #2 0xb75b5676 in iot_forget_wrapper () from
> /usr/lib/glusterfs/1.3.0-pre5/xlator/performance/io-threads.so
> #3 0xb7f44f4a in call_resume_wind (stub=0x8064038) at call-stub.c:2027
> #4 0xb7f44fd7 in call_resume (stub=0x810bfd8) at call-stub.c:2763
> #5 0xb75b97a5 in iot_worker () from
> /usr/lib/glusterfs/1.3.0-pre5/xlator/performance/io-threads.so
> #6 0xb7f153db in start_thread () from /lib/libpthread.so.0
> #7 0xb7e9f26e in clone () from /lib/libc.so.6
>
> Harris
>
> ----- Original Message -----
> From: "Basavanagowda Kanur" <gowda at zresearch.com>
> To: "Harris Landgarten" <harrisl at lhjonline.com>
> Cc: "Anand Avati" <avati at zresearch.com>, "gluster-devel" <
> gluster-devel at nongnu.org>
> Sent: Friday, June 29, 2007 9:36:17 AM (GMT-0500) America/New_York
> Subject: Re: [Gluster-devel] brick crash/hang with io-threads in 2.5 patch
> 240
>
> Harris,
> Please find the fix for the bug in patch-243.
>
> Thanks,
> gowda
>
>
> On 6/28/07 , Harris Landgarten < harrisl at lhjonline.com > wrote:
>
> Avati,
>
> I managed to get a bt from the server by attaching to the process with gdb
>
> 0xb7f60f38 in dict_set (this=0x8056fc8, key=0xb75d8fa3 "key",
> value=0x8056c90) at dict.c:124
> 124 for (pair = this->members[hashval]; pair != NULL; pair =
> pair->hash_next) {
> (gdb) bt
> #0 0xb7f60f38 in dict_set (this=0x8056fc8, key=0xb75d8fa3 "key",
> value=0x8056c90) at dict.c:124
> #1 0xb75cf36b in server_getxattr_cbk () from
> /usr/lib/glusterfs/1.3.0-pre5/xlator/protocol/server.so
> #2 0xb7f64d55 in default_getxattr_cbk (frame=0x8057228, cookie=0x8057740,
> this=0x804ffc0, op_ret=0, op_errno=13, dict=0x8056fc8) at defaults.c:1071
> #3 0xb7f6d462 in call_resume (stub=0x8056858) at call-stub.c:2469
> #4 0xb75e1770 in iot_reply () from
> /usr/lib/glusterfs/1.3.0-pre5/xlator/performance/io-threads.so
> #5 0xb7f3d3db in start_thread () from /lib/libpthread.so.0
> #6 0xb7ec726e in clone () from /lib/libc.so.6
>
> I hope this helps. Have you been able to reproduce?
>
> Harris
>
> ----- Original Message -----
> From: "Anand Avati" < avati at zresearch.com >
> To: "Harris Landgarten" < harrisl at lhjonline.com >
> Cc: "gluster-devel" < gluster-devel at nongnu.org >
> Sent: Wednesday, June 27, 2007 8:09:13 AM (GMT-0500) America/New_York
> Subject: Re: [Gluster-devel] brick crash/hang with io-threads in 2.5 patch
> 240
>
> is there a bactrace of the server available too? it would be of great
> help..
>
> thanks,
> avati
>
>
> 2007/6/27 , Harris Landgarten < harrisl at lhjonline.com >:
>
> Whenever I enable io-threads in one of my bricks I can cause a crash
>
> in client1:
>
> ls -lR /mnt/glusterfs
>
> while this is running
>
> in client2:
>
> ls -l /mnt/glusterfs
> ls: /mnt/glusterfs/secondary: Transport endpoint is not connected
> total 4
> ?--------- ? ? ? ? ? /mnt/glusterfs/backups
> ?--------- ? ? ? ? ? /mnt/glusterfs/tmp
>
> At this point the brick with io-threads has crashed:
>
> 2007-06-27 07:45:55 C [common-utils.c:205:gf_print_trace] debug-backtrace:
> Got signal (11), printing backtrace
> 2007-06-27 07:45:55 C [common-utils.c:207:gf_print_trace] debug-backtrace:
> /usr/lib/libglusterfs.so.0(gf_print_trace+0x2d) [0xb7fabd4d]
> 2007-06-27 07:45:55 C [common-utils.c:207:gf_print_trace] debug-backtrace:
> [0xbfffe420]
> 2007-06-27 07:45:55 C [common-utils.c:207:gf_print_trace] debug-backtrace:
> /usr/lib/glusterfs/1.3.0-pre5/xlator/protocol/server.so [0xb761436b]
> 2007-06-27 07:45:55 C [common-utils.c:207:gf_print_trace] debug-backtrace:
> /usr/lib/libglusterfs.so.0 [0xb7fa9d55]
> 2007-06-27 07:45:55 C [common-utils.c:207:gf_print_trace] debug-backtrace:
> /usr/lib/libglusterfs.so.0(call_resume+0x4f2) [0xb7fb2462]
> 2007-06-27 07:45:55 C [common-utils.c:207:gf_print_trace] debug-backtrace:
> /usr/lib/glusterfs/1.3.0-pre5/xlator/performance/io- threads.so[0xb7626770]
> 2007-06-27 07:45:55 C [common-utils.c:207:gf_print_trace] debug-backtrace:
> /lib/libpthread.so.0 [0xb7f823db]
> 2007-06-27 07:45:55 C [common-utils.c:207:gf_print_trace] debug-backtrace:
> /lib/libc.so.6(clone+0x5e) [0xb7f0c26
>
>
> The bricks is running on fedora and it doesn't want to generate a core.
> Any suggestions?
>
> This is the spec file I used for the test
>
>
> ### Export volume "brick" with the contents of "/home/export" directory.
> volume posix1
> type storage/posix # POSIX FS translator
> option directory /mnt/export # Export this directory
> end-volume
>
> volume io-threads
> type performance/io-threads
> option thread-count 8
> subvolumes posix1
> end-volume
>
> ### Add POSIX record locking support to the storage brick
> volume brick
> type features/posix-locks
> option mandatory on # enables mandatory locking on all files
> subvolumes io-threads
> end-volume
>
>
> ### Add network serving capability to above brick.
> volume server
> type protocol/server
> option transport-type tcp/server # For TCP/IP transport
> # option transport-type ib-sdp/server # For Infiniband transport
> # option bind-address 192.168.1.10 # Default is to listen on all
> interfaces
> option listen-port 6996 # Default is 6996
> # option client-volume-filename /etc/glusterfs/glusterfs- client.vol
> subvolumes brick
> # NOTE: Access to any volume through protocol/server is denied by
> # default. You need to explicitly grant access through "auth" option.
> option auth.ip.brick.allow * # access to "brick" volume
> end-volume
>
>
>
> _______________________________________________
> Gluster-devel mailing list
> Gluster-devel at nongnu.org
> http://lists.nongnu.org/mailman/listinfo/gluster-devel
>
>
>
> --
> Anand V. Avati
>
>
> _______________________________________________
> Gluster-devel mailing list
> Gluster-devel at nongnu.org
> http://lists.nongnu.org/mailman/listinfo/gluster-devel
>
>
>
>
>
>
>
> _______________________________________________
> Gluster-devel mailing list
> Gluster-devel at nongnu.org
> http://lists.nongnu.org/mailman/listinfo/gluster-devel
>
>
>
> _______________________________________________
> Gluster-devel mailing list
> Gluster-devel at nongnu.org
> http://lists.nongnu.org/mailman/listinfo/gluster-devel
>
--
Amar Tumballi
http://amar.80x25.org
[bulde on #gluster/irc.gnu.org]
More information about the Gluster-devel
mailing list