Skip to content

Commit

Permalink
implemented PCRE based regexprs
Browse files Browse the repository at this point in the history
  • Loading branch information
cesar-douady committed Apr 13, 2024
1 parent 3ec8007 commit 80efcb3
Show file tree
Hide file tree
Showing 8 changed files with 203 additions and 54 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ LINK_O := $(CXX) $(COVERAGE) -r
LINK_SO := $(CXX) $(COVERAGE) $(LINK_OPTS) -shared # some usage may have specific libs, avoid dependencies
LINK_BIN := $(CXX) $(COVERAGE) $(LINK_OPTS)
LINK_LIB := -ldl
ifneq ($(HAS_PCRE),)
LINK_LIB += -lpcre2-8
endif
#
ifeq ($(CXX_FLAVOR),clang)
WARNING_FLAGS += -Wno-misleading-indentation -Wno-unknown-warning-option -Wno-c2x-extensions -Wno-c++2b-extensions
Expand Down
16 changes: 16 additions & 0 deletions _bin/sys_config
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,18 @@ EOF
$CXX -o start_main -xc start_main.c
USE_LIBC_START_MAIN=$(./start_main)

#
# PCRE
#
cat <<"EOF" > pcre.cc
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
EOF
if $CXX -c -std=c++20 -o pcre.o pcre.cc 2>/dev/null
then HAS_PCRE=1
else HAS_PCRE=0
fi

cd $START_DIR

cat >$MK_FILE <<EOF
Expand All @@ -289,17 +301,21 @@ CXX_FLAVOR := $CXX_FLAVOR
LINK_LIB_PATH := $LINK_LIB_PATH
STD_INC_DIRS := $STD_INC_DIRS
#
HAS_PCRE := ${HAS_PCRE#0}
HAS_SECCOMP := ${HAS_SECCOMP#0}
HAS_SLURM := ${HAS_SLURM#0}
#
STD_PATH := $(env -i /bin/bash -c 'echo $PATH')
EOF

cat >$H_FILE <<EOF
#pragma once
#define ADDR2LINE "$ADDR2LINE"
#define HAS_CLOSE_RANGE $HAS_CLOSE_RANGE
#define HAS_LD_AUDIT $HAS_LD_AUDIT
#define HAS_MEMFD $HAS_MEMFD
#define HAS_OSTRINGSTREAM_VIEW $HAS_OSTRINGSTREAM_VIEW
#define HAS_PCRE $HAS_PCRE
#define HAS_PTRACE_GET_SYSCALL_INFO $HAS_PTRACE_GET_SYSCALL_INFO
#define HAS_SECCOMP $HAS_SECCOMP
#define HAS_SLURM $HAS_SLURM
Expand Down
22 changes: 14 additions & 8 deletions lmake_env/Lmakefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,19 +141,25 @@ class ConfigH(BaseRule) :
deps = { 'CONFIGURE' : 'ext/{DirS}configure' }
cmd = 'cd ext/{DirS} ; ./configure'

class SysConfig(PathRule) :
targets = {
class SysConfig(PathRule) : # XXX : handle PCRE
targets = {
'H' : 'sys_config.h'
, 'MK' : 'sys_config.mk'
, 'TRIAL' : 'trial/{*:.*}'
}
deps = { 'EXE' : '_bin/sys_config' }
cmd = 'CXX={gxx} PYTHON={sys.executable} ./{EXE} {MK} {H} 2>&1'
side_targets = {
'MK' : 'sys_config.mk'
}
deps = { 'EXE' : '_bin/sys_config' }
cmd = '''
CXX={gxx} PYTHON={sys.executable} ./{EXE} {MK} {H} 2>&1
echo '#undef HAS_PCRE' >> {H}
echo '#define HAS_PCRE 0' >> {H}
'''

class VersionH(BaseRule) :
target = 'version.hh'
deps = { 'EXE' : '_bin/version' }
cmd = "./{EXE} $(grep '\.cc$' Manifest) $(grep '\.hh$' Manifest)"
target = 'version.hh'
deps = { 'EXE' : '_bin/version' }
cmd = "./{EXE} $(grep '\.cc$' Manifest) $(grep '\.hh$' Manifest)"

opt_tab = {}
class GenOpts(BaseRule) :
Expand Down
23 changes: 12 additions & 11 deletions src/lmakeserver/req.cc
Original file line number Diff line number Diff line change
Expand Up @@ -501,9 +501,9 @@ namespace Engine {

void ReqData::_report_no_rule( Node node , NfsGuard& nfs_guard , DepDepth lvl ) {
::string name = node->name() ;
::vmap<RuleTgt,Rule::SimpleMatch> mrts ; // matching rules
RuleTgt art ; // set if an anti-rule matches
RuleIdx n_missing = 0 ; // number of rules missing deps
::vmap<RuleTgt,Rule::SimpleMatch> mrts ; // matching rules
RuleTgt art ; // set if an anti-rule matches
RuleIdx n_missing = 0 ; // number of rules missing deps
//
if (name.size()>g_config.path_max) {
audit_node( Color::Warning , "name is too long :" , node , lvl ) ;
Expand All @@ -520,31 +520,32 @@ namespace Engine {
return ;
}
//
for( RuleTgt rt : Node::s_rule_tgts(name).view() ) { // first pass to gather info : mrts : matching rules, n_missing : number of missing deps
for( RuleTgt rt : Node::s_rule_tgts(name).view() ) { // first pass to gather info : mrts : matching rules, n_missing : number of missing deps
if (!rt.pattern().match(name) ) continue ;
if (rt->special==Special::Anti) { art = rt ; break ; }
Rule::SimpleMatch m{rt,name} ;
mrts.emplace_back(rt,m) ;
if ( JobTgt jt{rt,name} ; +jt && jt->run_status!=RunStatus::MissingStatic ) continue ; // do not pass *this as req to avoid generating error message at cxtor time
if ( JobTgt jt{rt,name} ; +jt && jt->run_status!=RunStatus::MissingStatic ) goto Continue ; // do not pass *this as req to avoid generating error message at cxtor time
try { rt->deps_attrs.eval(m) ; }
catch (::pair_ss const&) { continue ; } // do not consider rule if deps cannot be computed
catch (::pair_ss const&) { goto Continue ; } // do not consider rule if deps cannot be computed
n_missing++ ;
Continue :
mrts.emplace_back(rt,::move(m)) ;
}
//
if ( !art && !mrts ) audit_node( Color::Err , "no rule match" , node , lvl ) ;
else audit_node( Color::Err , "no rule for" , node , lvl ) ;
if ( !art && is_target(nfs_guard.access(name)) ) audit_node( Color::Note , "consider : git add" , node , lvl+1 ) ;
//
for( auto const& [rt,m] : mrts ) { // second pass to do report
JobTgt jt { rt , name } ; // do not pass *this as req to avoid generating error message at cxtor time
for( auto const& [rt,m] : mrts ) { // second pass to do report
JobTgt jt { rt , name } ; // do not pass *this as req to avoid generating error message at cxtor time
::string reason ;
Node missing_dep ;
::vmap_s<DepSpec> static_deps ;
if ( +jt && jt->run_status!=RunStatus::MissingStatic ) { reason = "does not produce it" ; goto Report ; }
try { static_deps = rt->deps_attrs.eval(m) ; }
catch (::pair_ss const& msg_err) { reason = to_string("cannot compute its deps :\n",msg_err.first,msg_err.second) ; goto Report ; }
{ ::string missing_key ;
for( bool search_non_buildable : {true,false} ) // first search a non-buildable, if not found, search for non makable as deps have been made
for( bool search_non_buildable : {true,false} ) // first search a non-buildable, if not found, search for non makable as deps have been made
for( auto const& [k,dn] : static_deps ) {
Node d{dn.txt} ;
if ( search_non_buildable ? d->buildable>Buildable::No : d->status()<=NodeStatus::Makable ) continue ;
Expand All @@ -553,7 +554,7 @@ namespace Engine {
goto Found ;
}
Found :
SWEAR(+missing_dep) ; // else why wouldn't it apply ?!?
SWEAR(+missing_dep) ; // else why wouldn't it apply ?!?
::string mdn = missing_dep->name() ;
FileInfo fi { nfs_guard.access(mdn) } ;
reason = to_string( "misses static dep ", missing_key , (+fi?" (existing)":fi.tag()==FileTag::Dir?" (dir)":"") ) ;
Expand Down
2 changes: 1 addition & 1 deletion src/lmakeserver/rule.x.hh
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ namespace Engine {
// END_OF_VERSIONING

// not stored on disk
::vector<VarIdx> stem_mark_counts ;
::vector<size_t> stem_mark_counts ;
/**/ TargetPattern job_name_pattern ;
::vector<TargetPattern> patterns ;
Crc match_crc = Crc::None ;
Expand Down
2 changes: 0 additions & 2 deletions src/non_portable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
// This program is free software: you can redistribute/modify under the terms of the GPL-v3 (https://www.gnu.org/licenses/gpl-3.0.html).
// This program is distributed WITHOUT ANY WARRANTY, without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

#include "sys_config.h"

#include <elf.h> // NT_PRSTATUS definition on ARM
#include <sys/ptrace.h>

Expand Down
186 changes: 154 additions & 32 deletions src/re.hh
Original file line number Diff line number Diff line change
Expand Up @@ -7,44 +7,166 @@

#include "utils.hh"

#include <regex>
#if HAS_PCRE
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
#else
#include <regex>
#endif

namespace Re {

struct Match : ::smatch {
// cxtors & casts
using smatch::smatch ;
// accesses
::string_view operator[](size_t i) const {
::sub_match sm = smatch::operator[](i) ;
return {sm.first,sm.second} ;
struct Match ;
struct RegExpr ;

#if HAS_PCRE

inline uint8_t const* _cast_in (char const* p) { return reinterpret_cast<uint8_t const*>(p) ; }
inline uint8_t * _cast_in (char * p) { return reinterpret_cast<uint8_t *>(p) ; }

inline void swap( Match& a , Match& b ) ;
struct Match {
friend class RegExpr ;
friend void swap( Match& a , Match& b ) ;
// statics
private :
pcre2_match_data* _s_mk_data(RegExpr const& re) ;
// cxtors & casts
Match() = default ;
//
Match( RegExpr const& re ) ;
Match( RegExpr const& re , ::string const& s ) ;
public :
~Match() {
pcre2_match_data_free(_data) ;
}
//
Match (Match&& m) { swap(*this,m) ; }
Match& operator=(Match&& m) { swap(*this,m) ; return *this ; }
// accesses
bool operator+() const { return pcre2_get_ovector_pointer(_data)[0]!=PCRE2_UNSET ; }
bool operator!() const { return !+*this ; }
//
::string_view operator[](size_t i) const {
PCRE2_SIZE const* v = pcre2_get_ovector_pointer(_data) ;
return { _subject.data()+v[2*i] , v[2*i+1]-v[2*i] } ;
}
// data
private :
pcre2_match_data* _data = nullptr ;
::string _subject ;
} ;
inline void swap( Match& a , Match& b ) {
::swap(a._data ,b._data ) ;
::swap(a._subject,b._subject) ;
}

inline void swap( RegExpr& a , RegExpr& b ) ;
struct RegExpr {
friend class Match ;
friend void swap( RegExpr& a , RegExpr& b ) ;
static constexpr size_t ErrMsgSz = 120 ; // per PCRE doc
// cxtors & casts
RegExpr() = default ;
RegExpr( ::string const& pattern , bool /*fast*/=false , bool /*no_groups*/=false ) {
int err = 0 ;
PCRE2_SIZE err_pos = 0 ;
_code = pcre2_compile(
_cast_in(pattern.c_str()) , pattern.size()
, PCRE2_ANCHORED | PCRE2_DOTALL | PCRE2_ENDANCHORED
, &err , &err_pos
, nullptr/*context*/
) ;
if (!_code) {
char err_buf[ErrMsgSz] ;
pcre2_get_error_message(err,_cast_in(err_buf),sizeof(err_buf)) ;
throw ::string(err_buf) ;
}
}
~RegExpr() {
pcre2_code_free(_code) ;
}
//
RegExpr (RegExpr&& re) { swap(*this,re) ; }
RegExpr& operator=(RegExpr&& re) { swap(*this,re) ; return *this ; }
// services
Match match (::string const& subject) const { return { *this ,subject } ; }
bool can_match(::string const& subject) const { return +match(subject) ; }
size_t mark_count() const {
uint32_t cnt ;
pcre2_pattern_info( _code , PCRE2_INFO_CAPTURECOUNT , &cnt ) ;
return cnt ;
}
// data
private :
pcre2_code* _code = nullptr ;
} ;
inline void swap( RegExpr& a , RegExpr& b ) {
::swap(a._code,b._code) ;
}
//
bool operator+() const { return !empty() ; }
bool operator!() const { return !+*this ; }
} ;

struct RegExpr : ::regex {
static constexpr ::regex_constants::syntax_option_type None { 0 } ;
// cxtors & casts
RegExpr() = default ;
RegExpr( ::string const& pattern , bool fast=false , bool no_groups=false ) :
::regex{ pattern ,
/**/ ::regex::ECMAScript
| (fast ? ::regex::optimize : None )
| (no_groups ? ::regex::nosubs : None )
}
{}
// services
Match match(::string const& txt) const {
Match res ;
::regex_match(txt,res,*this) ;

inline pcre2_match_data* Match::_s_mk_data(RegExpr const& re) {
pcre2_match_data* res = pcre2_match_data_create_from_pattern(re._code,nullptr) ;
SWEAR(pcre2_get_ovector_count(res)>0) ;
pcre2_get_ovector_pointer(res)[0] = PCRE2_UNSET ;
return res ;
}
bool can_match(::string const& txt) const {
Match m ;
return ::regex_match(txt,m,*this) ;

inline Match::Match( RegExpr const& re ) : _data{_s_mk_data(re)} {}
inline Match::Match( RegExpr const& re , ::string const& s ) : _data{_s_mk_data(re)} , _subject{s} {
pcre2_match(
re._code
, _cast_in(_subject.c_str()) , _subject.size() , 0/*start_offset*/
, 0/*options*/
, _data
, nullptr/*context*/
) ;
}
} ;

#else

struct Match : private ::smatch {
friend class RegExpr ;
// cxtors & casts
private :
using smatch::smatch ;
// accesses
public :
bool operator+() const { return !empty() ; }
bool operator!() const { return !+*this ; }
//
::string_view operator[](size_t i) const {
::sub_match sm = smatch::operator[](i) ;
return {sm.first,sm.second} ;
}
} ;

struct RegExpr : private ::regex {
friend class Match ;
static constexpr ::regex_constants::syntax_option_type None { 0 } ;
// cxtors & casts
RegExpr() = default ;
RegExpr( ::string const& pattern , bool fast=false , bool no_groups=false ) :
::regex{ pattern ,
/**/ ::regex::ECMAScript
| (fast ? ::regex::optimize : None )
| (no_groups ? ::regex::nosubs : None )
}
{}
// services
Match match(::string const& txt) const {
Match res ;
::regex_match(txt,res,*this) ;
return res ;
}
bool can_match(::string const& txt) const {
return +match(txt) ;
}
size_t mark_count() const {
return ::regex::mark_count() ;
}
} ;

#endif

}
3 changes: 3 additions & 0 deletions src/xxhsum.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@

#include "hash.hh"

#include "re.hh"

using namespace Hash ;

int main( int argc , char* argv[] ) {

for( int i=1 ; i<argc ; i++ ) {
::cout << ::string(Crc(argv[i],Algo::Xxh)) ;
if (argc>2) ::cout <<' '<< argv[i] ;
Expand Down

0 comments on commit 80efcb3

Please sign in to comment.