[Gluster-devel] segfault with tla 846 - suggestions?

Dan Parsons dparsons at nyip.net
Fri Jan 16 15:58:07 UTC 2009


Avati spent some time with me yesterday going through two different cores. He says he has all the info he needs now.

Yes, it is very easy to reproduce. It takes about 15 minutes.

Dan Parsons

-----Original Message-----
From: "Krishna Srinivas" <krishna at zresearch.com>

Date: Fri, 16 Jan 2009 18:05:56 
To: Dan Parsons<dparsons at nyip.net>
Cc: Gluster Developers Discussion List<gluster-devel at nongnu.org>
Subject: Re: [Gluster-devel] segfault with tla 846 - suggestions?


Dan,

Will investigate as soon as possible. Can you paste the back trace using gdb?
"gdb -c <path to core file> glusterfs" and then type "bt".

Is this problem easily reproducible?

Krishna

On Fri, Jan 16, 2009 at 5:49 AM, Dan Parsons <dparsons at nyip.net> wrote:
> I just had the glusterfs client crash on a bunch of nodes. The I/O profile
> was basically many small reads of many tiny files. Nothing of significance
> in server log files (just it noticing client disconnects). The testing I did
> last night was a lot of dd if=file of=/dev/null reads of huge files; those
> worked just fine. Any suggestions on how to fix this would be greatly
> appreciated.
>
> Crash message:
>
> pending frames:
> frame : type(1) op(STAT)
> frame : type(1) op(STAT)
> frame : type(1) op(FLUSH)
> frame : type(1) op(FLUSH)
> frame : type(1) op(OPEN)
> frame : type(1) op(WRITE)
> frame : type(1) op(UNLINK)
>
> patchset: glusterfs--mainline--3.0--patch-846
> signal received: 11
> configuration details:argp 1
> backtrace 1
> db.h 1
> dlfcn 1
> fdatasync 1
> libpthread 1
> llistxattr 1
> setfsid 1
> spinlock 1
> epoll.h 1
> xattr.h 1
> st_atim.tv_nsec 1
> package-string: glusterfs 1.4.0tla846
> /lib64/libc.so.6[0x384f2301b0]
> /lib64/libpthread.so.0(pthread_mutex_lock+0x19)[0x384fe08309]
> /usr/local/lib/libglusterfs.so.0(inode_unlink+0x2c)[0x2b58737fa85c]
> /usr/local/lib/glusterfs/1.4.0tla846/xlator/mount/fuse.so[0x2b587470bb92]
> /usr/local/lib/glusterfs/1.4.0tla846/xlator/features/filter.so[0x2b58744f97e0]
> /usr/local/lib/libglusterfs.so.0[0x2b58737f2e60]
> /usr/local/lib/glusterfs/1.4.0tla846/xlator/cluster/unify.so(unify_unlink_cbk+0x96)[0x2b58740e2566]
> /usr/local/lib/glusterfs/1.4.0tla846/xlator/protocol/client.so(client_unlink_cbk+0x85)[0x2b5873aa4c45]
> /usr/local/lib/glusterfs/1.4.0tla846/xlator/protocol/client.so(protocol_client_pollin+0xc1)[0x2b5873a9fff1]
> /usr/local/lib/glusterfs/1.4.0tla846/xlator/protocol/client.so(notify+0x13a)[0x2b5873aa64ea]
> /usr/local/lib/glusterfs/1.4.0tla846/transport/socket.so[0x2aaaaaaadd8e]
> /usr/local/lib/libglusterfs.so.0[0x2b5873804b55]
> /usr/local/sbin/glusterfs(main+0x948)[0x403368]
> /lib64/libc.so.6(__libc_start_main+0xf4)[0x384f21d8b4]
> /usr/local/sbin/glusterfs[0x402099]
>
> Client config:
> volume unify-switch-ns
>   type protocol/client
>   option transport-type tcp
>   option remote-host 10.8.101.51
>   option remote-subvolume posix-unify-switch-ns
> end-volume
>
> #volume distfs01-ns-readahead
> #   type performance/read-ahead
> #   option page-size 1MB
> #   option page-count 8
> #   subvolumes distfs01-ns-brick
> #end-volume
>
> #volume unify-switch-ns
> #   type performance/write-behind
> #   option block-size 1MB
> #   option cache-size 3MB
> #   subvolumes distfs01-ns-readahead
> #end-volume
>
> volume distfs01-unify
>   type protocol/client
>   option transport-type tcp
>   option remote-host 10.8.101.51
>   option remote-subvolume posix-unify
> end-volume
>
> volume distfs02-unify
>   type protocol/client
>   option transport-type tcp
>   option remote-host 10.8.101.52
>   option remote-subvolume posix-unify
> end-volume
>
> volume distfs03-unify
>   type protocol/client
>   option transport-type tcp
>   option remote-host 10.8.101.53
>   option remote-subvolume posix-unify
> end-volume
>
> volume distfs04-unify
>   type protocol/client
>   option transport-type tcp
>   option remote-host 10.8.101.54
>   option remote-subvolume posix-unify
> end-volume
>
> volume distfs01-stripe
>   type protocol/client
>   option transport-type tcp
>   option remote-host 10.8.101.51
>   option remote-subvolume posix-stripe
> end-volume
>
> volume distfs02-stripe
>   type protocol/client
>   option transport-type tcp
>   option remote-host 10.8.101.52
>   option remote-subvolume posix-stripe
> end-volume
>
> volume distfs03-stripe
>   type protocol/client
>   option transport-type tcp
>   option remote-host 10.8.101.53
>   option remote-subvolume posix-stripe
> end-volume
>
> volume distfs04-stripe
>   type protocol/client
>   option transport-type tcp
>   option remote-host 10.8.101.54
>   option remote-subvolume posix-stripe
> end-volume
>
> volume stripe0
>        type cluster/stripe
>        option block-size *.jar,*.pin:1MB,*:2MB
>        subvolumes distfs01-stripe distfs02-stripe distfs03-stripe
> distfs04-stripe
> end-volume
>
> volume dht0
>        type cluster/dht
>        subvolumes distfs01-unify distfs02-unify distfs03-unify
> distfs04-unify
> end-volume
>
> volume unify
>        type cluster/unify
>        option namespace unify-switch-ns
>        option self-heal off
>        option scheduler switch
> # send *.phr/psq/pnd etc to stripe0, send the rest to hash
> # extensions have to be *.foo* and not simply *.foo or rsync's tmp file
> naming will prevent files from being matched
>        option scheduler.switch.case
> *.phr*:stripe0;*.psq*:stripe0;*.pnd*:stripe0;*.psd*:stripe0;*.pin*:stripe0;*.nsi*:stripe0;*.nin*:stripe0;*.nsd*:stripe0;*.nhr*:stripe0;*.nsq*:stripe0;*.tar*:stripe0;*.tar.gz*:stripe0;*.jar*:stripe0;*.img*:stripe0;*.perf*:stripe0;*.tgz*:stripe0;*.fasta*:stripe0;*.huge*:stripe0
>        subvolumes stripe0 dht0
> end-volume
>
> volume ioc
>   type performance/io-cache
>   subvolumes unify
>   option cache-size 5000MB
> end-volume
>
> volume filter
>  type features/filter
>  option fixed-uid 0
>  option fixed-gid 900
>  subvolumes ioc
> end-volume
>
>
> Server config:
>
> volume posix-unify-brick
>        type storage/posix
>        option directory /distfs-storage-space/glusterfs/unify
> # the below line is here to make the output of 'df' accurate, as both
> volumes are served from the same local drive
>        option export-statfs-size off
> end-volume
>
> volume posix-stripe-brick
>        type storage/posix
>        option directory /distfs-storage-space/glusterfs/stripe
> end-volume
>
> volume posix-unify-switch-ns-brick
>        type storage/posix
>        option directory /distfs-storage-space/glusterfs/unify-switch-ns
> end-volume
>
> volume posix-unify
>        type performance/io-threads
>        option thread-count 4
>        subvolumes posix-unify-brick
> end-volume
>
> volume posix-stripe
>        type performance/io-threads
>        option thread-count 4
>        subvolumes posix-stripe-brick
> end-volume
>
> volume posix-unify-switch-ns
>        type performance/io-threads
>        option thread-count 2
>        subvolumes posix-unify-switch-ns-brick
> end-volume
>
> volume server
>        type protocol/server
>        option transport-type tcp
>        option auth.addr.posix-unify.allow 10.8.101.*
>        option auth.addr.posix-stripe.allow 10.8.101.*
>        option auth.addr.posix-unify-switch-ns.allow 10.8.101.*
>        subvolumes posix-unify posix-stripe posix-unify-switch-ns
> end-volume
>
>
> Dan Parsons
>
>
>
>
> _______________________________________________
> Gluster-devel mailing list
> Gluster-devel at nongnu.org
> http://lists.nongnu.org/mailman/listinfo/gluster-devel
>



More information about the Gluster-devel mailing list