From 7416aa1ed57af8d8d073735fcc9c837d62e0bf1a Mon Sep 17 00:00:00 2001 From: Leslie Qi Wang Date: Mon, 30 Sep 2024 22:16:58 +0000 Subject: [PATCH] socket should be closed when socket error happened during abort FD leak issue happened from time to time at our production environment. Using strace, I found many EAGAIN error on read/write when issue occured. This PR should fix some cases, will watch after new fix is deployed Example `strace` logs: ``` read(1727, 0xc002a2e000, 517) = -1 EAGAIN (Resource temporarily unavailable) read(1727, 0xc0001e3c00, 1772) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "\223\303tH\t\246\5\261p\224\0376\272\302J\27\205\217\2412\206\220\267\5\22CO\315\213\331\313P"..., 5132) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "\340,M\273\324K\27V\17\222\232-\332j\204\3618\313R\221\35\16[\267\316\304,\360r\232?m"..., 12899) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "_\20173\200\334W]aT\227\233\362LO^56\270\21\325\267%\361^\264O@\376\315\365Z"..., 5279) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "J\261\236\nK\325\354\231\312\rF'M\6\222\"v\247\317L\346tE\354Z*\251/?\254\357\214"..., 15977) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "\371\345\221\352\10\232Pa\204\253\306\\lm\271\231Y\305\314\216U\244\314\247\36\204\300\256\231\217\2727"..., 7331) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "=aFp\207\367 lJ\245\351\37\313\357\232f\333\232\344D\361\266D\223\220\312\345r\321\10J}"..., 13193) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "\315\10`\370\352\243K\314\265\270\3\260A6n\221\230Y\212V-Xp\325\231\236\350\32\207=A\25"..., 4547) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "\300\206\365\313\325{:\0052hn:\270\205\30TC\23\203\237\360\273\216\242\353\373Y)\233\211\305B"..., 8504) = -1 EAGAIN (Resource temporarily unavailable) read(1727, 0xc0029e18c0, 517) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "\354\243\206\270'X\\\344\323\205\247gXa\254\335a\345|\\g\232Z\307\375r\314\33\207\31l\237"..., 1910) = -1 EAGAIN (Resource temporarily unavailable) read(1727, 0xc001f8ad80, 517) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "\246|\230}S\24\201\202\253\242\37\216\2121t\3\366=\345#\253\342^\335?\355\243Q\254\nI\245"..., 9677) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "z^f\226\372\353\25\24\214\302\331\253\222\310|\330l\364\201n\231\275Oi$[\30\273b\244\376&"..., 12608) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "\263\326{h\204V\345\254\223\320\n\214\301\325;~\276\367\351\202\10\212\241\343\33\262\343\263\272\230\312\361"..., 6014) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "Qe\315\302\316\t\204\375\212RV\35u_\267\347N\361Xo\3\254cP{)\n\276\357.J\351"..., 11876) = -1 EAGAIN (Resource temporarily unavailable) write(1770, "\250E\244\276\35\204\n\34\376\0-\251/\22\10\327\310\332Y\6\204P\6\307NF\3K\363\316\351\311"..., 4256) = -1 EAGAIN (Resource temporarily unavailable) futex(0x220fb50, FUTEX_WAIT_PRIVATE, 0, NULL) = -1 EAGAIN (Resource temporarily unavailable) futex(0x220fb50, FUTEX_WAIT_PRIVATE, 0, NULL) = -1 EAGAIN (Resource temporarily unavailable) ``` Signed-off-by: Leslie Qi Wang --- receiver.go | 8 ++++++-- sender.go | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/receiver.go b/receiver.go index 9a25e99..7244567 100644 --- a/receiver.go +++ b/receiver.go @@ -258,6 +258,12 @@ func (r *receiver) abort(err error) error { if r.conn == nil { return nil } + + defer func() { + r.conn.close() + r.conn = nil + }() + if r.hook != nil { r.hook.OnFailure(r.buildTransferStats(), err) } @@ -266,7 +272,5 @@ func (r *receiver) abort(err error) error { if err != nil { return err } - r.conn.close() - r.conn = nil return nil } diff --git a/sender.go b/sender.go index de9d673..1fd50db 100644 --- a/sender.go +++ b/sender.go @@ -278,6 +278,12 @@ func (s *sender) abort(err error) error { if s.conn == nil { return nil } + + defer func() { + s.conn.close() + s.conn = nil + }() + if s.hook != nil { s.hook.OnFailure(s.buildTransferStats(), err) } @@ -286,7 +292,5 @@ func (s *sender) abort(err error) error { if err != nil { return err } - s.conn.close() - s.conn = nil return nil }