From 8cdb1ad49d779505006ddd6d032a2e6cee2855f0 Mon Sep 17 00:00:00 2001 From: Cesar Douady Date: Thu, 2 May 2024 00:19:25 +0200 Subject: [PATCH] fix rare case of crash after check_deps or depend verbose --- src/lmakeserver/backend.cc | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/lmakeserver/backend.cc b/src/lmakeserver/backend.cc index 247dfab1..ca73748a 100644 --- a/src/lmakeserver/backend.cc +++ b/src/lmakeserver/backend.cc @@ -17,11 +17,18 @@ namespace Backends { void send_reply( JobIdx job , JobMngtRpcReply&& jmrr ) { Lock lock { Backend::_s_mutex } ; auto it = Backend::_s_start_tab.find(job) ; - if (it==Backend::_s_start_tab.end()) return ; // job is dead without waiting for reply, curious but possible - Backend::StartEntry::Conn const& conn = it->second.conn ; - ClientSockFd fd ( conn.host , conn.port ) ; - jmrr.seq_id = conn.seq_id ; - OMsgBuf().send( fd , jmrr ) ; // XXX : straighten out Fd : Fd must not detach on mv and Epoll must take an AutoCloseFd as arg to take close resp. + if (it==Backend::_s_start_tab.end()) return ; // job is dead without waiting for reply, curious but possible + Backend::StartEntry const& e = it->second ; + try { + jmrr.seq_id = e.conn.seq_id ; + ClientSockFd fd( e.conn.host , e.conn.port , 3/*n_trials*/ ) ; + OMsgBuf().send( fd , jmrr ) ; // XXX : straighten out Fd : Fd must not detach on mv and Epoll must take an AutoCloseFd as arg to take close resp. + } catch (...) { // if we cannot connect to job, assume it is dead while we processed the request + Backend::_s_deferred_wakeup_thread->emplace_after( + g_config.network_delay + , Backend::DeferredEntry { e.conn.seq_id , JobExec(Job(job),e.conn.host,e.start_date,New) } + ) ; + } } // @@ -424,7 +431,7 @@ namespace Backends { DF} Job job { jmrr.job } ; Trace trace(BeChnl,"_s_handle_job_mngt",jmrr) ; - { Lock lock { _s_mutex } ; // prevent sub-backend from manipulating _s_start_tab from main thread, lock for minimal time + { Lock lock { _s_mutex } ; // prevent sub-backend from manipulating _s_start_tab from main thread, lock for minimal time // keep_fd auto it = _s_start_tab.find(+job) ; if (it==_s_start_tab.end() ) { trace("not_in_tab" ) ; return false ; } StartEntry& entry = it->second ; if (entry.conn.seq_id!=jmrr.seq_id) { trace("bad_seq_id",entry.conn.seq_id,jmrr.seq_id) ; return false ; } @@ -642,7 +649,7 @@ namespace Backends { be->addr = ServerSockFd::s_addr(ifce) ; } try { be->config(cfg.dct,dynamic) ; be->config_err.clear() ; trace("ready",t ) ; } - catch (::string const& e) { SWEAR(+e) ; be->config_err = e ; trace("err" ,t,e) ; } // empty config_err means ready + catch (::string const& e) { SWEAR(+e) ; be->config_err = e ; trace("err" ,t,e) ; } // empty config_err means ready } job_start_thread.wait_started() ; job_mngt_thread .wait_started() ;