Skip to content

Commit

Permalink
implemented dynamic interpreter + fixed import when in a subdir + fix…
Browse files Browse the repository at this point in the history
…ed cases of job_exec and server hanging + protect against chdir in dynamic attributes
  • Loading branch information
cesar-douady committed Apr 9, 2024
1 parent 757fec3 commit d66771b
Show file tree
Hide file tree
Showing 28 changed files with 408 additions and 326 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -646,14 +646,14 @@ UNIT_TESTS : UNIT_TESTS1 UNIT_TESTS2
@( cd $(@D) ; git clean -ffdxq >/dev/null 2>/dev/null ) ; : # keep $(@D) to ease debugging, ignore rc as old versions of git work but generate an error
@for f in $$(grep '^$(UT_DIR)/base/' Manifest) ; do df=$(@D)/$${f#$(UT_DIR)/base/} ; mkdir -p $$(dirname $$df) ; cp $$f $$df ; done
@cd $(@D) ; find . -type f -printf '%P\n' > Manifest
@( cd $(@D) ; PATH=$(ROOT_DIR)/bin:$(ROOT_DIR)/_bin:$$PATH $(ROOT_DIR)/$< ) >$@ 2>$@.err || ( cat $@ $@.err ; mv $@ $@.out ; exit 1 )
@( cd $(@D) ; PATH=$(ROOT_DIR)/bin:$(ROOT_DIR)/_bin:$$PATH $(ROOT_DIR)/$< ) >$@.out 2>$@.err && mv $@.out $@ || ( cat $@ $@.err ; exit 1 )

%.dir/tok : %.py $(LMAKE_FILES) _lib/ut.py
@echo py test to $@
@mkdir -p $(@D)
@( cd $(@D) ; git clean -ffdxq >/dev/null 2>/dev/null ) ; : # keep $(@D) to ease debugging, ignore rc as old versions of git work but generate an error
@cp $< $(@D)/Lmakefile.py
@( cd $(@D) ; PATH=$(ROOT_DIR)/bin:$(ROOT_DIR)/_bin:$$PATH PYTHONPATH=$(ROOT_DIR)/lib:$(ROOT_DIR)/_lib HOME= $(PYTHON) Lmakefile.py ) >$@ 2>$@.err || ( cat $@ $@.err ; mv $@ $@.out ; exit 1 )
@( cd $(@D) ; PATH=$(ROOT_DIR)/bin:$(ROOT_DIR)/_bin:$$PATH PYTHONPATH=$(ROOT_DIR)/lib:$(ROOT_DIR)/_lib HOME= $(PYTHON) Lmakefile.py ) >$@.out 2>$@.err && mv $@.out $@ || ( cat $@ $@.err ; exit 1 )

#
# lmake env
Expand Down
16 changes: 7 additions & 9 deletions TO_DO
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ items :
* BUGS (implemented but does not work)
****************************************************************************************************

* fix compilation with LMAKE_SAN=T
* fix compilation with LMAKE_FLAGS=ST
* missing some deps when reading elf
- it seems that libc.so is missing at least in some occasions
* reimplement hierarchical repositories
Expand All @@ -28,8 +28,6 @@ items :
* LACK (not implemented but necessary for lmake semantic)
****************************************************************************************************

* improve lshow -i
- generate info on nodes
* manage 32 bits executables
- compile ld_audit.so (and co) in both 32 & 64 bits
- put adequate $PLATFORM in LD_PRELOAD
Expand All @@ -53,6 +51,10 @@ items :
* COSMETIC (ugly as long as not implemented)
****************************************************************************************************

* report "killed while waiting for stdout or stderr"
- when a job is killed while the child is terminated
- so as to indicate to the user that something weird was on going
- could also report this time in lshow -i
* generate an error when calling depend on a target
- either known at that time
- or when it becomes a target if later
Expand All @@ -78,8 +80,9 @@ items :
- such requests must go through the engine_loop
- this may take a while
- this means during this time, all jobs may connect (and actually do)
- requiring a coket for each slurm slot
- requiring a socket for each slurm slot
- defeating the purpose of disconnecting jobs during execution
- in all cases, code must resist to an error in accept
* fix store to be compliant with strict aliasing rules
* support 64-bits id
- configure with NBits rather than types
Expand Down Expand Up @@ -110,11 +113,6 @@ items :
* FEATURES (not implemented and can work without)
****************************************************************************************************

* implement a lshow -r to see what is running now and -B to see BOM (list of sources)
- implement a generic walk through deps
- use it for check_deps, which will prevent jobs post critical deps to be run
* support dynamic values for interpreter
- python and shell
* improve job isolation by using namespaces
- much like faketree : https://github.com/enfabrica/enkit/tree/master/faketree
- generalize tmp mapping
Expand Down
21 changes: 12 additions & 9 deletions _bin/sys_config
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,20 @@ CXX=${CXX:-g++}
type -p $CXX >/dev/null || { echo cannot find c++ compiler $CXX ; exit 1 ; }

case "$($CXX --version|head -1)" in
*clang* ) CXX_FLAVOR=clang ; [ "$($CXX -dumpversion)" -lt 15 ] && { echo clang version must be at least 15 ; exit 1 ; } ;;
*g++* ) CXX_FLAVOR=gcc ; [ "$($CXX -dumpversion)" -lt 11 ] && { echo gcc version must be at least 11 ; exit 1 ; } ;;
* ) echo cannot recognize c++ compiler $CXX ; exit 1 ;;
*clang* ) CXX_FLAVOR=clang ; v=$($CXX -dumpversion) ; [ ${v%%.*} -lt 15 ] && { echo clang version must be at least 15 ; exit 1 ; } ;;
*g++* ) CXX_FLAVOR=gcc ; v=$($CXX -dumpversion) ; [ ${v%%.*} -lt 11 ] && { echo gcc version must be at least 11 ; exit 1 ; } ;;
* ) echo cannot recognize c++ compiler $CXX ; exit 1 ;;
esac

LLP="$($CXX -v -E /dev/null 2>&1 | grep LIBRARY_PATH=)" # e.g. : LIBARY_PATH=/usr/lib/x:/a/b:/c:/a/b/c/..
LLP="$(echo $LLP | sed 's/LIBRARY_PATH=//' )" # e.g. : /usr/lib/x:/a/b:/c:/a/b/c/..
LLP="$(echo $LLP | sed 's/:/ /' )" # e.g. : /usr/lib/x /a/b /c /a/b/c/..
LLP="$(echo $LLP | sort -u )" # e.g. : /usr/lib/x /a/b /c /a/b
LINK_LIB_PATH= # e.g. : /a/b /c /usr/lib/x
for l in $LLP ; do # e.g. : /a/b /c (suppress standard dirs as required in case of installed package)
# XXX : do the equivalent probe with clang
if [ $CXX_FLAVOR = gcc ] ; then
LLP="$($CXX -v -E /dev/null 2>&1 | grep LIBRARY_PATH=)" # e.g. : LIBRARY_PATH=/usr/lib/x:/a/b:/c:/a/b/c/..
fi
LLP="$(echo $LLP | sed 's/LIBRARY_PATH=//' )" # e.g. : /usr/lib/x:/a/b:/c:/a/b/c/..
LLP="$(echo $LLP | sed 's/:/ /' )" # e.g. : /usr/lib/x /a/b /c /a/b/c/..
LLP="$(echo $LLP | sort -u )" # e.g. : /usr/lib/x /a/b /c /a/b
LINK_LIB_PATH= # e.g. : /a/b /c /usr/lib/x
for l in $LLP ; do # e.g. : /a/b /c (suppress standard dirs as required in case of installed package)
case $l/ in
/usr/lib/* ) ;;
/usr/lib64/*) ;;
Expand Down
9 changes: 5 additions & 4 deletions _lib/lmake/rules.src.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
import lmake
from . import has_ld_audit,pdict,root_dir # if not in an lmake repo, root_dir is not set to current dir

shell = '$BASH' # .
python = _sys.executable

_std_path = '$STD_PATH' # substituted at installation time
_bash = '$BASH' # .
_ld_library_path = '$LD_LIBRARY_PATH' # .
_lmake_dir = __file__.rsplit('/lib/',1)[0]
_python = _sys.executable

_rules = lmake._rules # list of rules that must be filled in by user code

Expand Down Expand Up @@ -87,8 +88,8 @@ class Rule(_RuleBase) :
n_retries = 1 # number of retries in case of job lost. 1 is a reasonable value
# n_tokens = 1 # number of jobs likely to run in parallel for this rule (used for ETA estimation)
# prio = 0 # in case of ambiguity, rules are selected with highest prio first
python = (_python,) # python used for callable cmd
shell = (_bash ,) # shell used for str cmd (_sh is usually /bin/sh which may test for dir existence before chdir, which defeats auto_mkdir)
python = (python,) # python used for callable cmd
shell = (shell ,) # shell used for str cmd (_sh is usually /bin/sh which may test for dir existence before chdir, which defeats auto_mkdir)
start_delay = 3 # delay before sending a start message if job is not done by then, 3 is a reasonable compromise
max_stderr_len = 100 # maximum number of stderr lines shown in output (full content is accessible with lshow -e), 100 is a reasonable compromise
# timeout = None # timeout allocated to job execution (in s), must be None or an int
Expand Down
39 changes: 20 additions & 19 deletions _lib/read_makefiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,9 @@ def handle_inheritance(rule) :
typ,dyn = StdAttrs[k]
if typ and not ( dyn and callable(v) ) :
try :
if typ in (tuple,list) and not isinstance(v,(tuple,list)) : v = typ((v,))
else : v = typ( v )
if callable(v) : pass
elif typ in (tuple,list) and not isinstance(v,(tuple,list)) : v = typ((v,))
else : v = typ( v )
except :
raise TypeError(f'bad format for {k} : cannot be converted to {typ.__name__}')
attrs[k] = v
Expand Down Expand Up @@ -393,8 +394,6 @@ def prepare_jobs(self) :
, *( k for k in self.rule_rep.matches.keys() if k.isidentifier() )
}
#
if self.attrs.is_python : self.rule_rep.interpreter = self.attrs.python
else : self.rule_rep.interpreter = self.attrs.shell
for attr in ('cwd','ete','force','max_submit_count','n_tokens') :
if attr in self.attrs : self.rule_rep[attr] = self.attrs[attr]

Expand Down Expand Up @@ -437,31 +436,33 @@ def handle_submit_none(self) :
self.rule_rep.submit_none_attrs = self._finalize()

def handle_start_cmd(self) :
if self.attrs.is_python : interpreter = 'python'
else : interpreter = 'shell'
self._init()
self._handle_val('auto_mkdir' )
self._handle_val('env' ,'environ_cmd')
self._handle_val('ignore_stat' )
self._handle_val('chroot' )
self._handle_val('interpreter' )
self._handle_val('tmp' )
self._handle_val('use_script' )
self._handle_val('auto_mkdir' )
self._handle_val('env' ,rep_key='environ_cmd')
self._handle_val('ignore_stat' )
self._handle_val('chroot' )
self._handle_val('interpreter',rep_key=interpreter )
self._handle_val('tmp' )
self._handle_val('use_script' )
self.rule_rep.start_cmd_attrs = self._finalize()

def handle_start_rsrcs(self) :
self._init()
self._handle_val('autodep' )
self._handle_val('env' ,'environ_resources')
self._handle_val('timeout' )
self._handle_val('autodep' )
self._handle_val('env' ,rep_key='environ_resources')
self._handle_val('timeout' )
self.rule_rep.start_rsrcs_attrs = self._finalize()

def handle_start_none(self) :
if not callable(self.attrs.kill_sigs) : self.attrs.kill_sigs = [int(x) for x in self.attrs.kill_sigs]
self._init()
self._handle_val('keep_tmp' )
self._handle_val('start_delay' )
self._handle_val('kill_sigs' )
self._handle_val('n_retries' )
self._handle_val('env' ,'environ_ancillary')
self._handle_val('keep_tmp' )
self._handle_val('start_delay' )
self._handle_val('kill_sigs' )
self._handle_val('n_retries' )
self._handle_val('env' ,rep_key='environ_ancillary')
self.rule_rep.start_none_attrs = self._finalize()

def handle_end_cmd(self) :
Expand Down
4 changes: 2 additions & 2 deletions doc/lmake.texi
Original file line number Diff line number Diff line change
Expand Up @@ -2131,7 +2131,7 @@ This is typically used to access some environment variables set by @code{slurm}.
@item Default
@tab system Python
@item Dynamic
@tab False
@tab Yes. Environment includes stems, targets, deps and resources.
@end multitable

This attribute defines the interpreter used to run the @code{cmd} if it is a @code{function}.
Expand All @@ -2151,7 +2151,7 @@ In particular, Python2.7 and all revisions of Python3 are fully supported.
@item Default
@tab @code{/bin/bash}
@item Dynamic
@tab False
@tab Yes. Environment includes stems, targets, deps and resources.
@end multitable

This attribute defines the interpreter used to run the @code{cmd} if it is a @code{str}.
Expand Down
6 changes: 3 additions & 3 deletions src/autodep/clmake.cc
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ static PyObject* get_autodep( PyObject* /*null*/ , PyObject* args , PyObject* kw
if (n_args>0) return py_err_set(Exception::TypeErr,"expected no args" ) ;
char c = 0/*garbage*/ ;
// we have a private Record with a private AutodepEnv, so we must go through the backdoor to alter the regular AutodepEnv
int rc [[maybe_unused]] = ::readlink( (PrivateAdminDir+"/backdoor/autodep"s ).c_str() , &c , 1 ) ;
int rc [[maybe_unused]] = ::readlinkat( Record::s_root_fd() , (PrivateAdminDir+"/backdoor/autodep"s ).c_str() , &c , 1 ) ;
SWEAR( c=='0' || c=='1' , int(c) ) ;
SWEAR( rc==1 , rc ) ;
return Ptr<Bool>(c!='0')->to_py_boost() ;
Expand All @@ -262,8 +262,8 @@ static PyObject* set_autodep( PyObject* /*null*/ , PyObject* args , PyObject* kw
char c ;
// we have a private Record with a private AutodepEnv, so we must go through the backdoor to alter the regular AutodepEnv
int rc [[maybe_unused]] ; // avoid compiler warning
if (+py_args[0]) rc = ::readlink( (PrivateAdminDir+"/backdoor/enable"s ).c_str() , &c , 1 ) ;
else rc = ::readlink( (PrivateAdminDir+"/backdoor/disable"s).c_str() , &c , 1 ) ; // note that the depend and target functions are still working while disabled
if (+py_args[0]) rc = ::readlinkat( Record::s_root_fd() , (PrivateAdminDir+"/backdoor/enable"s ).c_str() , &c , 1 ) ;
else rc = ::readlinkat( Record::s_root_fd() , (PrivateAdminDir+"/backdoor/disable"s).c_str() , &c , 1 ) ; // note that the depend and target functions are still working while disabled
return None.to_py_boost() ;
}

Expand Down
Loading

0 comments on commit d66771b

Please sign in to comment.