[Gluster-users] Unify very slow for 2000 query to cluster / s

Andrew McGill list2008 at lunch.za.net
Fri Nov 7 08:05:32 UTC 2008


On Friday 17 October 2008 06:00:08 Duc Le Minh wrote:
> I build one cluster by GlusterFS 1.3 with 3 node for brick
>
> Xeon QuardCore 2.33GHz, 32G RAM, 3G Network (bonding), Raid 6 with 12HDD 1T
> + 2  HotSpare
>
> On this cluster i have 3M File with 16TB .
>
> Client
> 5 node Xeon QuarCore 2.33Ghz, 8GRAM, 2G Network (bonding). Runing Lighttpd
> with 600 Concurent Connection for streaming FLV
>
>
> I run streaming 5Gbps normal, but if run 'ls -l' in directory mount
> glusterfs then all system very slow.
>
> Please Help me for solution
If you run ls -l, that does a stat() of each file in the directory.
By comparison, echo * is lightning fast, since it only has to do a readdir() 
from the main node.  You won't see a delay on a low-latency network, but in 
the non-developer case, it is sloooooooooooooooow.  

A solution (short of reducing network latency) would be threaded versions of 
filesystem tools which work with multiple files, so that they can copy, move 
and stat files in parallel, and and benefit from filesystem parallelism --

	export GNU_COREUTILS_THREADS=8
	cp   # simultaneous read() and write() sessions
	mv   # simultaneous link and unlink calls
	ls   # parallel stat()

One could also optimise the text utilities like cat by doing the open() and 
stat() operations in parallel and in the background -- userspace read-ahead 
caching.  All of the utilities which process mutliple filenames could get 
better speed from this -- rm, cat, chown, chmod ... even tail, head, wc.

I suspect that one could produce better performance even on a local filesystem 
with some well placed threading in the utilities.

> This Server Config:
> ##############################################
> ###  GlusterFS Server Volume Specification  ##
> ##############################################
>
>
> # Unify Volume 1
> volume unify-raw1
>   type storage/posix
>   option directory /home/node1/unify/baamboo/data
> end-volume
>
> volume u-posix-locks1
>   type features/posix-locks
>   option mandatory on
>   subvolumes unify-raw1
> end-volume
>
> volume u-io-thr1
>   type performance/io-threads
>   option thread-count 4
>   option cache-size 128MB
>   subvolumes u-posix-locks1
> end-volume
>
> volume u-wb1
>   type performance/write-behind
>   option aggregate-size 1MB # default is 0bytes
>   option flush-behind on    # default is 'off'
>   subvolumes u-io-thr1
> end-volume
>
> volume unify1
>   type performance/read-ahead
>   option page-size 512kB        # 256KB is the default option
>   option page-count 64           # 2 is default option
>   option force-atime-update off # default is off
>   subvolumes u-wb1
> end-volume
>
>
> # Unify Volume 2
> volume unify-raw2
>   type storage/posix
>   option directory /home/node2/unify/baamboo/data
> end-volume
>
> volume u-posix-locks2
>   type features/posix-locks
>   option mandatory on
>   subvolumes unify-raw2
> end-volume
>
> volume u-io-thr2
>   type performance/io-threads
>   option thread-count 4
>   option cache-size 128MB
>   subvolumes u-posix-locks2
> end-volume
>
> volume u-wb2
>   type performance/write-behind
>   option aggregate-size 1MB # default is 0bytes
>   option flush-behind on    # default is 'off'
>   subvolumes u-io-thr2
> end-volume
>
> volume unify2
>   type performance/read-ahead
>   option page-size 512kB        # 256KB is the default option
>   option page-count 64           # 2 is default option
>   option force-atime-update off # default is off
>   subvolumes u-wb2
> end-volume
>
>
>
>
> volume ns-raw
>   type storage/posix
>   option directory /home/node1/unify/baamboo/ns
> end-volume
>
> volume ns-io-thr
>   type performance/io-threads
>   option thread-count 4
>   option cache-size 32MB
>   subvolumes ns-raw
> end-volume
>
> volume ns
>   type performance/read-ahead
>   option page-size 256kB        # 256KB is the default option
>   option page-count 16           # 2 is default option
>   option force-atime-update off # default is off
>   subvolumes ns-io-thr
> end-volume
>
>
>
> ### Add network serving capability to above brick.
> volume server
>   type protocol/server
>   option transport-type tcp/server     # For TCP/IP transport
>   option bind-address 192.168.6.6     # Default is  listen on all
> interfaces option listen-port 60001              # Default is 6996
>   subvolumes unify1 unify2 ns
>   option auth.ip.unify1.allow 192.168.* # Allow access to "brick" volume
>   option auth.ip.unify2.allow 192.168.* # Allow access to "brick" volume
>   option auth.ip.ns.allow 192.168.* # Allow access to "brick" volume
> end-volume
>
>
> Client Config
>
> ### file: client-volume.spec.sample
>
>
> ##############################################
> ###  GlusterFS Client Volume Specification  ##
> ##############################################
>
> ### Add client feature and attach to remote subvolume
> # volume client
> #  type protocol/client
> #  option transport-type tcp/client     # for TCP/IP transport
> # option ib-verbs-work-request-send-size  1048576
> # option ib-verbs-work-request-send-count 16
> # option ib-verbs-work-request-recv-size  1048576
> # option ib-verbs-work-request-recv-count 16
> # option transport-type ib-sdp/client  # for Infiniband transport
> # option transport-type ib-verbs/client # for ib-verbs transport
> #  option remote-host 127.0.0.1         # IP address of the remote brick
> # option remote-port 6996              # default server port is 6996
>
> # option transport-timeout 30          # seconds to wait for a reply
>                                        # from server for each request
> #  option remote-subvolume brick        # name of the remote volume
> # end-volume
>
>
> # Volume for Node 1
> volume unify_1
>   type protocol/client
>   option transport-type tcp/client
>   option remote-host 192.168.6.6
>   option remote-port 60001
>   option remote-subvolume unify1
>   option transport-timeout 600          # seconds to wait for a reply
> end-volume
>
> # Volume for Node 2
> volume unify_2
>   type protocol/client
>   option transport-type tcp/client
>   option remote-host 192.168.6.6
>   option remote-port 60001
>   option remote-subvolume unify2
>   option transport-timeout 600          # seconds to wait for a reply
> end-volume
>
> # Volume for Node 3
> volume unify_3
>   type protocol/client
>   option transport-type tcp/client
>   option remote-host 192.168.6.8
>   option remote-port 60001
>   option remote-subvolume unify1
>   option transport-timeout 600          # seconds to wait for a reply
> end-volume
>
> # Volume for Node 4
> volume unify_4
>   type protocol/client
>   option transport-type tcp/client
>   option remote-host 192.168.6.8
>   option remote-port 60001
>   option remote-subvolume unify2
>   option transport-timeout 600          # seconds to wait for a reply
> end-volume
>
>
> # Volume for Node 5
> volume unify_5
>   type protocol/client
>   option transport-type tcp/client
>   option remote-host 192.168.6.4
>   option remote-port 60001
>   option remote-subvolume unify1
>   option transport-timeout 600          # seconds to wait for a reply
> end-volume
>
>
> # Volume for Node 6
> volume unify_6
>   type protocol/client
>   option transport-type tcp/client
>   option remote-host 192.168.6.4
>   option remote-port 60001
>   option remote-subvolume unify2
>   option transport-timeout 600          # seconds to wait for a reply
> end-volume
>
>
> # Volume for Node NS
> # volume unify_ns1
> #   type protocol/client
> #   option transport-type tcp/client
> #   option remote-host 192.168.6.6
> #   option remote-port 60001
> #   option remote-subvolume ns
> #   option transport-timeout 600          # seconds to wait for a reply
> # end-volume
>
> # Volume for Node NS
> volume unify_ns_raw # unify_ns2
>   type protocol/client
>   option transport-type tcp/client
>   option remote-host 192.168.6.8
>   option remote-port 60001
>   option remote-subvolume ns
>   option transport-timeout 600          # seconds to wait for a reply
> end-volume
>
> # Volume for Node NS
> # volume unify_ns3
> #   type protocol/client
> #   option transport-type tcp/client
> #   option remote-host 192.168.6.4
> #   option remote-port 60001
> #   option remote-subvolume ns
> #   option transport-timeout 600          # seconds to wait for a reply
> # end-volume
>
> # Volume AFR node 1-2-3
> # volume unify_ns_raw
> #   type cluster/afr
> #   subvolumes unify_ns1 unify_ns2 unify_ns3
> # end-volume
>
>
> volume ns_iot
>   type performance/io-threads
>   option thread-count 4
>   option cache-size 256MB
>   subvolumes unify_ns_raw
> end-volume
>
>
> # Add readahead feature
> volume ns_readahead
>   type performance/read-ahead
>   option page-size 128kB     # unit in bytes
>   option page-count 16       # cache per file  = (page-count x page-size)
>   subvolumes ns_iot
> end-volume
>
> volume unify_ns  # ns_ioc
>   type performance/io-cache
>   option cache-size 128MB             # default is 32MB
>   option page-size 128KB               #128KB is default option
>   # option priority *.h:3,*.html:2,*:1 # default is '*:0'
>   option force-revalidate-timeout 2  # default is 1
>   subvolumes ns_readahead
> end-volume
>
>
>
> volume unify
>   type cluster/unify
>   subvolumes unify_1 unify_2 unify_3 unify_4  unify_5 unify_6
>   option namespace unify_ns
>   option scheduler rr
>   option rr.limits.min-free-disk 25%
>   option rr.refresh-interval 10
> end-volume
>
> volume iot
>   type performance/io-threads
>   option thread-count 4
>   option cache-size 256MB
>   subvolumes unify
> end-volume
>
> volume wb
>   type performance/write-behind
>   option aggregate-size 1MB # default is 0bytes
>   option flush-behind on # default is 'off'
>   subvolumes iot
> end-volume
>
> volume readahead
>   type performance/read-ahead
>   option page-size 512kB     # unit in bytes
>   option page-count 64       # cache per file  = (page-count x page-size)
>   subvolumes wb
> end-volume
>
> volume ioc
>   type performance/io-cache
>   option cache-size 1024MB             # default is 32MB
>   option page-size 1MB               #128KB is default option
>   # option priority *.h:3,*.html:2,*:1 # default is '*:0'
>   option force-revalidate-timeout 2  # default is 1
>   subvolumes readahead
> end-volume
>
>
> Thanks!






More information about the Gluster-users mailing list