[Gluster-users] glusterfs locks / pacemaker

Thu May 7 17:45:32 UTC 2015

Hi,

have a config with 2 nodes where glusterfs is being mounted to root dir 
of apache. After producing a kernel panic by echo c > 
/proc/sysrq-trigger on node2, node 2 is turned off by stonith. The 
problem is that apache cannot start because glusterfs is locking the 
volume for almost 1minute. Thus pacemaker marks the resource with a 
score of -INF to prevent it from starting again. After it's unlocked I 
need to manually cleanup the resources (to set the score on 0)and start 
apache again.

Why is glusterfs locking so long is there a possibility to reduce the 
lock-time?

Below the logs
following setup: pacemaker 1.1.7, corosync 1.4

Online: [ vm-1 vm-2 ]

   Clone Set: cl_gluster_mnt [p_gluster_mnt]
       Started: [ vm-1 vm-2 ]
   Clone Set: cl_apache [p_apache]
       Started: [ vm-1 vm-2 ]
   Clone Set: cl_IP [IP] (unique)
       IP:0       (ocf::heartbeat:IPaddr2):       Started vm-1
       IP:1       (ocf::heartbeat:IPaddr2):       Started vm-2
p_fence_N1      (stonith:external/libvirt):     Started vm-2
p_fence_N2      (stonith:external/libvirt):     Started vm-1

root at vm-1:~# crm configure show
node vm-1 \
      attributes standby="off"
node vm-2 \
      attributes standby="off"
primitive IP ocf:heartbeat:IPaddr2 \
      params ip="192.168.122.200" nic="eth0"
clusterip_hash="sourceip-sourceport" \
      op monitor interval="10s"
primitive p_apache ocf:heartbeat:apache \
      params configfile="/etc/apache2/apache2.conf"
statusurl="http://localhost/server-status" \
      op monitor interval="60" timeout="20" \
      op start interval="0" timeout="40s" start-delay="0" \
      meta is-managed="true"
primitive p_fence_N1 stonith:external/libvirt \
      params hostlist="vm-1:N1"
hypervisor_uri="qemu+tcp://192.168.122.1/system" pcmk_reboot_action="off" \
      op monitor interval="60" \
      meta target-role="Started"
primitive p_fence_N2 stonith:external/libvirt \
      params hostlist="vm-2:N2"
hypervisor_uri="qemu+tcp://192.168.122.1/system" pcmk_reboot_action="off" \
      op monitor interval="60"
primitive p_gluster_mnt ocf:heartbeat:Filesystem \
      params device="localhost:/gvolrep" directory="/var/www/html"
fstype="glusterfs" \
      op monitor interval="10"
clone cl_IP IP \
      meta globally-unique="true" clone-max="2" clone-node-max="2" \
      params resource-stickiness="0"
clone cl_apache p_apache \
      meta target-role="Started"
clone cl_gluster_mnt p_gluster_mnt \
      meta target-role="Started"
location l_fence_N1 p_fence_N1 -inf: vm-1
location l_fence_N2 p_fence_N2 -inf: vm-2
colocation c_apache_gluster inf: cl_IP cl_gluster_mnt
colocation c_ip_apache inf: cl_apache cl_IP
order o_apache inf: cl_gluster_mnt cl_IP cl_apache
property $id="cib-bootstrap-options" \
      dc-version="1.1.7-ee0730e13d124c3d58f00016c3376a1de5323cff" \
      cluster-infrastructure="openais" \
      expected-quorum-votes="2" \
      stonith-enabled="true" \
      no-quorum-policy="ignore" \
      last-lrm-refresh="1430996556"
rsc_defaults $id="rsc-options" \
      resource-stickiness="100"
op_defaults $id="op-options" \
      timeout="240s"