<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
        {font-family:Wingdings;
        panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
        {font-family:SimSun;
        panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:SimSun;
        panose-1:2 1 6 0 3 1 1 1 1 1;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0cm;
        margin-bottom:.0001pt;
        text-align:justify;
        text-justify:inter-ideograph;
        font-size:10.5pt;
        font-family:"Calibri",sans-serif;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:#0563C1;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {mso-style-priority:99;
        color:#954F72;
        text-decoration:underline;}
span.EmailStyle17
        {mso-style-type:personal-compose;
        font-family:"Calibri",sans-serif;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-family:"Calibri",sans-serif;}
/* Page Definitions */
@page WordSection1
        {size:612.0pt 792.0pt;
        margin:72.0pt 90.0pt 72.0pt 90.0pt;}
div.WordSection1
        {page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang="ZH-CN" link="#0563C1" vlink="#954F72" style="text-justify-trim:punctuation">
<div class="WordSection1">
<p class="MsoNormal"><span lang="EN-US">Hi,<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">Could you help to check this coredump?<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">We are using glusterfs 3.12-3(3 replicated bricks solution ) to do stability testing under high CPU load like 80% by stress and doing I/O.<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">After several hours, coredump happened in glusterfs side .<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">[Current thread is 1 (Thread 0x7ffff37d2700 (LWP 3696))]<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">Missing separate debuginfos, use: dnf debuginfo-install rcp-pack-glusterfs-1.8.1_11_g99e9ca6-RCP2.wf28.x86_64<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">(gdb) bt<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">#0 0x00007ffff0d5c845 in wb_fulfill (wb_inode=0x7fffd406b3b0, liabilities=0x7fffdc234b50) at write-behind.c:1148<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">#1 0x00007ffff0d5e4d5 in wb_process_queue (wb_inode=0x7fffd406b3b0) at write-behind.c:1718<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">#2 0x00007ffff0d5eda7 in wb_writev (frame=0x7fffe0086290, this=0x7fffec014b00, fd=0x7fffe4034070, vector=0x7fffdc445720, count=1, offset=67108863, flags=32770, iobref=0x7fffdc00d550, xdata=0x0)<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> at write-behind.c:1825<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">#3 0x00007ffff0b51fcb in du_writev_resume (ret=0, frame=0x7fffdc0305a0, opaque=0x7fffdc0305a0) at disk-usage.c:490<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">#4 0x00007ffff7b3510d in synctask_wrap () at syncop.c:377<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">#5 0x00007ffff60d0660 in ?? () from /lib64/libc.so.6<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">#6 0x0000000000000000 in ?? ()<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">(gdb) p wb_inode<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">$1 = (wb_inode_t *) 0x7fffd406b3b0<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">(gdb) frame 2<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">#2 0x00007ffff0d5eda7 in wb_writev (frame=0x7fffe0086290, this=0x7fffec014b00, fd=0x7fffe4034070, vector=0x7fffdc445720, count=1, offset=67108863, flags=32770, iobref=0x7fffdc00d550, xdata=0x0)<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> at write-behind.c:1825<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">1825 in write-behind.c<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">(gdb) p *fd<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">$2 = {pid = 18154, flags = 32962, <span style="color:red">
refcount = 0,</span> inode_list = {next = 0x7fffe4034080, prev = 0x7fffe4034080}, inode = 0x0, lock = {spinlock = 0, mutex = {__data = {__lock = 0, __count = 0, __owner = 0,
<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> __nusers = 0, __kind = -1, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 16 times>, "\377\377\377\377", '\000' <repeats 19 times>, __align = 0}},
<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> _ctx = 0x7fffe4022930, xl_count = 17, lk_ctx = 0x7fffe40350e0, anonymous = _gf_false}<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">(gdb) p fd<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">$3 = (fd_t *) 0x7fffe4034070<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">(gdb) p wb_inode->this<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">$1 = (xlator_t *) 0xffffffffffffff00<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">After adding test log I found the FOP sequence in write-behind xlator side was mass as bellow showing. In the FUSE side the FLUSH is after write2, but in the WB side, FLUSH is between write2 ‘wb_do_unwinds’ and ‘wb_fulfill’.<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">So I think this should has problem. I think it’s possible that the FLUSH and later RELEASE operation will destroy the fd , it will cause ‘wb_in->this(0xffffffffffffff00)’. Do you think so?<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">And I think our new adding disk-usage xlator’s synctask_new will dealy the write operation, but the FLUSH operation without this delay(because not invoked the disk-usage xlator).<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">Do you agree with my speculation ? and how to fix?(we don’t want to move the disk-usage xlator)<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">Problematic FOP sequence :<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">FUSE side: WB side:<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> <o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">Write 1 write1<o:p></o:p></span></p>
<p class="MsoNormal" style="text-indent:115.5pt"><span lang="EN-US">Write2 do unwind<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">Write 2 FLUSH<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> Release(destroy fd)<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">FLUSH <span style="color:red">
write2</span> (wb_fulfill) then coredump.<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">Release<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">int<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities)<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">{<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> wb_request_t *req = NULL;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> wb_request_t *head = NULL;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> wb_request_t *tmp = NULL;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> wb_conf_t *conf = NULL;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> off_t expected_offset = 0;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> size_t curr_aggregate = 0;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> size_t vector_count = 0;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> int ret = 0;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> <span style="color:red">conf = wb_inode->this->private;
</span></span><span lang="EN-US" style="font-family:Wingdings;color:red">à</span><span lang="EN-US" style="color:red"> this line coredump</span><span lang="EN-US"><o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> list_for_each_entry_safe (req, tmp, liabilities, winds) {<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> list_del_init (&req->winds);<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">….<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">volume ccs-write-behind<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">68: type performance/write-behind<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">69: subvolumes ccs-dht<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">70: end-volume<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">71: <o:p></o:p></span></p>
<p class="MsoNormal"><b><span lang="EN-US"> 72: volume ccs-disk-usage
</span></b><b><span lang="EN-US" style="font-family:Wingdings">à</span><span lang="EN-US"> we add a new xlator here for write op ,just for checking if disk if full. And synctask_new for write.<o:p></o:p></span></b></p>
<p class="MsoNormal"><b><span lang="EN-US">73: type performance/disk-usage<o:p></o:p></span></b></p>
<p class="MsoNormal"><b><span lang="EN-US">74: subvolumes ccs-write-behind<o:p></o:p></span></b></p>
<p class="MsoNormal"><b><span lang="EN-US">75: end-volume<o:p></o:p></span></b></p>
<p class="MsoNormal"><span lang="EN-US">76: <o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> 77: volume ccs-read-ahead<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">78: type performance/read-ahead<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">79: subvolumes ccs-disk-usage<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">80: end-volume<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">Ps. Part of Our new translator code<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">int<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">du_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> struct iovec *vector, int count, off_t off, uint32_t flags,<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> struct iobref *iobref, dict_t *xdata)<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">{<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> int op_errno = -1;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> int ret = -1;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> du_local_t *local = NULL;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> loc_t tmp_loc = {0,};<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> VALIDATE_OR_GOTO (frame, err);<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> VALIDATE_OR_GOTO (this, err);<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> VALIDATE_OR_GOTO (fd, err);<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> tmp_loc.gfid[15] = 1;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> tmp_loc.inode = fd->inode;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> tmp_loc.parent = fd->inode;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> local = du_local_init (frame, &tmp_loc, fd, GF_FOP_WRITE);<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> if (!local) {<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> op_errno = ENOMEM;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> goto err;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> }<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> local->vector = iov_dup (vector, count);<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> local->offset = off;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> local->count = count;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> local->flags = flags;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> local->iobref = iobref_ref (iobref);<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> <o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> ret = <span style="color:red">synctask_new(</span>this->ctx->env, du_get_du_info,du_writev_resume,frame,frame);<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> if(ret)<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> {<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> op_errno = -1;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> gf_log (this->name, GF_LOG_WARNING,"synctask_new return failure ret(%d) ",ret);<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> goto err;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> }<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> return 0;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">err:<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> op_errno = (op_errno == -1) ? errno : op_errno;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> DU_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"> return 0;<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">}<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">Br,<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US">Li Deqian<o:p></o:p></span></p>
</div>
</body>
</html>