[Gluster-users] Horrible performance with small files (DHT/AFR)

Benjamin Krein superbenk at superk.org
Fri May 29 14:36:29 UTC 2009


I'm seeing extremely poor performance writing small files to a  
glusterfs DHT/AFR mount point.  Here are the stats I'm seeing:

* Number of files:
root at dev1|/home/aweber/cache|# find |wc -l
102440

* Average file size (bytes):
root at dev1|/home/aweber/cache|# ls -lR | awk '{sum += $5; n++;} END  
{print sum/n;}'
4776.47

* Using scp:
root at dev1|/home/aweber/cache|# time scp -rp * benk at cfs1:~/cache/

real	1m38.726s
user	0m12.173s
sys	0m12.141s

* Using cp to glusterfs mount point:
root at dev1|/home/aweber/cache|# time cp -rp * /mnt

real	30m59.101s
user	0m1.296s
sys	0m5.820s

Here is my configuration (currently, single client writing to 4  
servers (2 DHT servers doing AFR):

SERVER:

# webform flat-file cache

volume webform_cache
    type storage/posix
    option directory /home/clusterfs/webform/cache
end-volume

volume webform_cache_locks
    type features/locks
    subvolumes webform_cache
end-volume

volume webform_cache_brick
    type performance/io-threads
    option thread-count 32
    option max-threads 128
    option autoscaling on
    subvolumes webform_cache_locks
end-volume

<<snip>>

# GlusterFS Server
volume server
    type protocol/server
    option transport-type tcp
    subvolumes dns_public_brick dns_private_brick webform_usage_brick  
webform_cache_brick wordpress_uploads_brick subs_exports_brick
    option auth.addr.dns_public_brick.allow 10.1.1.*
    option auth.addr.dns_private_brick.allow 10.1.1.*
    option auth.addr.webform_usage_brick.allow 10.1.1.*
    option auth.addr.webform_cache_brick.allow 10.1.1.*
    option auth.addr.wordpress_uploads_brick.allow 10.1.1.*
    option auth.addr.subs_exports_brick.allow 10.1.1.*
end-volume

CLIENT:

# Webform Flat-File Cache Volume client configuration

volume srv1
	type protocol/client
	option transport-type tcp
	option remote-host cfs1
	option remote-subvolume webform_cache_brick
end-volume

volume srv2
	type protocol/client
	option transport-type tcp
	option remote-host cfs2
	option remote-subvolume webform_cache_brick
end-volume

volume srv3
	type protocol/client
	option transport-type tcp
	option remote-host cfs3
	option remote-subvolume webform_cache_brick
end-volume

volume srv4
	type protocol/client
	option transport-type tcp
	option remote-host cfs4
	option remote-subvolume webform_cache_brick
end-volume

volume afr1
	type cluster/afr
	subvolumes srv1 srv3
end-volume

volume afr2
	type cluster/afr
	subvolumes srv2 srv4
end-volume

volume dist
	type cluster/distribute
	subvolumes afr1 afr2
end-volume

volume writebehind
	type performance/write-behind
	option cache-size 4mb
    option flush-behind on
	subvolumes dist
end-volume

volume cache
	type performance/io-cache
	option cache-size 512mb
	subvolumes writebehind
end-volume

Benjamin Krein
www.superk.org







More information about the Gluster-users mailing list