From 80efcb3933065b9949e9a8e55aa5cac387b67d7c Mon Sep 17 00:00:00 2001 From: Cesar Douady Date: Sat, 13 Apr 2024 15:02:01 +0200 Subject: [PATCH] implemented PCRE based regexprs --- Makefile | 3 + _bin/sys_config | 16 ++++ lmake_env/Lmakefile.py | 22 +++-- src/lmakeserver/req.cc | 23 ++--- src/lmakeserver/rule.x.hh | 2 +- src/non_portable.cc | 2 - src/re.hh | 186 +++++++++++++++++++++++++++++++------- src/xxhsum.cc | 3 + 8 files changed, 203 insertions(+), 54 deletions(-) diff --git a/Makefile b/Makefile index af4baa2d..00429edd 100644 --- a/Makefile +++ b/Makefile @@ -53,6 +53,9 @@ LINK_O := $(CXX) $(COVERAGE) -r LINK_SO := $(CXX) $(COVERAGE) $(LINK_OPTS) -shared # some usage may have specific libs, avoid dependencies LINK_BIN := $(CXX) $(COVERAGE) $(LINK_OPTS) LINK_LIB := -ldl +ifneq ($(HAS_PCRE),) + LINK_LIB += -lpcre2-8 +endif # ifeq ($(CXX_FLAVOR),clang) WARNING_FLAGS += -Wno-misleading-indentation -Wno-unknown-warning-option -Wno-c2x-extensions -Wno-c++2b-extensions diff --git a/_bin/sys_config b/_bin/sys_config index cad6c0fe..6a304600 100755 --- a/_bin/sys_config +++ b/_bin/sys_config @@ -267,6 +267,18 @@ EOF $CXX -o start_main -xc start_main.c USE_LIBC_START_MAIN=$(./start_main) +# +# PCRE +# +cat <<"EOF" > pcre.cc + #define PCRE2_CODE_UNIT_WIDTH 8 + #include +EOF +if $CXX -c -std=c++20 -o pcre.o pcre.cc 2>/dev/null +then HAS_PCRE=1 +else HAS_PCRE=0 +fi + cd $START_DIR cat >$MK_FILE <$H_FILE <&1 + echo '#undef HAS_PCRE' >> {H} + echo '#define HAS_PCRE 0' >> {H} + ''' class VersionH(BaseRule) : - target = 'version.hh' - deps = { 'EXE' : '_bin/version' } - cmd = "./{EXE} $(grep '\.cc$' Manifest) $(grep '\.hh$' Manifest)" + target = 'version.hh' + deps = { 'EXE' : '_bin/version' } + cmd = "./{EXE} $(grep '\.cc$' Manifest) $(grep '\.hh$' Manifest)" opt_tab = {} class GenOpts(BaseRule) : diff --git a/src/lmakeserver/req.cc b/src/lmakeserver/req.cc index c15cfe46..614eba72 100644 --- a/src/lmakeserver/req.cc +++ b/src/lmakeserver/req.cc @@ -501,9 +501,9 @@ namespace Engine { void ReqData::_report_no_rule( Node node , NfsGuard& nfs_guard , DepDepth lvl ) { ::string name = node->name() ; - ::vmap mrts ; // matching rules - RuleTgt art ; // set if an anti-rule matches - RuleIdx n_missing = 0 ; // number of rules missing deps + ::vmap mrts ; // matching rules + RuleTgt art ; // set if an anti-rule matches + RuleIdx n_missing = 0 ; // number of rules missing deps // if (name.size()>g_config.path_max) { audit_node( Color::Warning , "name is too long :" , node , lvl ) ; @@ -520,23 +520,24 @@ namespace Engine { return ; } // - for( RuleTgt rt : Node::s_rule_tgts(name).view() ) { // first pass to gather info : mrts : matching rules, n_missing : number of missing deps + for( RuleTgt rt : Node::s_rule_tgts(name).view() ) { // first pass to gather info : mrts : matching rules, n_missing : number of missing deps if (!rt.pattern().match(name) ) continue ; if (rt->special==Special::Anti) { art = rt ; break ; } Rule::SimpleMatch m{rt,name} ; - mrts.emplace_back(rt,m) ; - if ( JobTgt jt{rt,name} ; +jt && jt->run_status!=RunStatus::MissingStatic ) continue ; // do not pass *this as req to avoid generating error message at cxtor time + if ( JobTgt jt{rt,name} ; +jt && jt->run_status!=RunStatus::MissingStatic ) goto Continue ; // do not pass *this as req to avoid generating error message at cxtor time try { rt->deps_attrs.eval(m) ; } - catch (::pair_ss const&) { continue ; } // do not consider rule if deps cannot be computed + catch (::pair_ss const&) { goto Continue ; } // do not consider rule if deps cannot be computed n_missing++ ; + Continue : + mrts.emplace_back(rt,::move(m)) ; } // if ( !art && !mrts ) audit_node( Color::Err , "no rule match" , node , lvl ) ; else audit_node( Color::Err , "no rule for" , node , lvl ) ; if ( !art && is_target(nfs_guard.access(name)) ) audit_node( Color::Note , "consider : git add" , node , lvl+1 ) ; // - for( auto const& [rt,m] : mrts ) { // second pass to do report - JobTgt jt { rt , name } ; // do not pass *this as req to avoid generating error message at cxtor time + for( auto const& [rt,m] : mrts ) { // second pass to do report + JobTgt jt { rt , name } ; // do not pass *this as req to avoid generating error message at cxtor time ::string reason ; Node missing_dep ; ::vmap_s static_deps ; @@ -544,7 +545,7 @@ namespace Engine { try { static_deps = rt->deps_attrs.eval(m) ; } catch (::pair_ss const& msg_err) { reason = to_string("cannot compute its deps :\n",msg_err.first,msg_err.second) ; goto Report ; } { ::string missing_key ; - for( bool search_non_buildable : {true,false} ) // first search a non-buildable, if not found, search for non makable as deps have been made + for( bool search_non_buildable : {true,false} ) // first search a non-buildable, if not found, search for non makable as deps have been made for( auto const& [k,dn] : static_deps ) { Node d{dn.txt} ; if ( search_non_buildable ? d->buildable>Buildable::No : d->status()<=NodeStatus::Makable ) continue ; @@ -553,7 +554,7 @@ namespace Engine { goto Found ; } Found : - SWEAR(+missing_dep) ; // else why wouldn't it apply ?!? + SWEAR(+missing_dep) ; // else why wouldn't it apply ?!? ::string mdn = missing_dep->name() ; FileInfo fi { nfs_guard.access(mdn) } ; reason = to_string( "misses static dep ", missing_key , (+fi?" (existing)":fi.tag()==FileTag::Dir?" (dir)":"") ) ; diff --git a/src/lmakeserver/rule.x.hh b/src/lmakeserver/rule.x.hh index 216893a7..35b1d090 100644 --- a/src/lmakeserver/rule.x.hh +++ b/src/lmakeserver/rule.x.hh @@ -622,7 +622,7 @@ namespace Engine { // END_OF_VERSIONING // not stored on disk - ::vector stem_mark_counts ; + ::vector stem_mark_counts ; /**/ TargetPattern job_name_pattern ; ::vector patterns ; Crc match_crc = Crc::None ; diff --git a/src/non_portable.cc b/src/non_portable.cc index 1c468452..3df6e6bb 100644 --- a/src/non_portable.cc +++ b/src/non_portable.cc @@ -3,8 +3,6 @@ // This program is free software: you can redistribute/modify under the terms of the GPL-v3 (https://www.gnu.org/licenses/gpl-3.0.html). // This program is distributed WITHOUT ANY WARRANTY, without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -#include "sys_config.h" - #include // NT_PRSTATUS definition on ARM #include diff --git a/src/re.hh b/src/re.hh index 785218e8..041e95f9 100644 --- a/src/re.hh +++ b/src/re.hh @@ -7,44 +7,166 @@ #include "utils.hh" -#include +#if HAS_PCRE + #define PCRE2_CODE_UNIT_WIDTH 8 + #include +#else + #include +#endif namespace Re { - struct Match : ::smatch { - // cxtors & casts - using smatch::smatch ; - // accesses - ::string_view operator[](size_t i) const { - ::sub_match sm = smatch::operator[](i) ; - return {sm.first,sm.second} ; + struct Match ; + struct RegExpr ; + + #if HAS_PCRE + + inline uint8_t const* _cast_in (char const* p) { return reinterpret_cast(p) ; } + inline uint8_t * _cast_in (char * p) { return reinterpret_cast(p) ; } + + inline void swap( Match& a , Match& b ) ; + struct Match { + friend class RegExpr ; + friend void swap( Match& a , Match& b ) ; + // statics + private : + pcre2_match_data* _s_mk_data(RegExpr const& re) ; + // cxtors & casts + Match() = default ; + // + Match( RegExpr const& re ) ; + Match( RegExpr const& re , ::string const& s ) ; + public : + ~Match() { + pcre2_match_data_free(_data) ; + } + // + Match (Match&& m) { swap(*this,m) ; } + Match& operator=(Match&& m) { swap(*this,m) ; return *this ; } + // accesses + bool operator+() const { return pcre2_get_ovector_pointer(_data)[0]!=PCRE2_UNSET ; } + bool operator!() const { return !+*this ; } + // + ::string_view operator[](size_t i) const { + PCRE2_SIZE const* v = pcre2_get_ovector_pointer(_data) ; + return { _subject.data()+v[2*i] , v[2*i+1]-v[2*i] } ; + } + // data + private : + pcre2_match_data* _data = nullptr ; + ::string _subject ; + } ; + inline void swap( Match& a , Match& b ) { + ::swap(a._data ,b._data ) ; + ::swap(a._subject,b._subject) ; + } + + inline void swap( RegExpr& a , RegExpr& b ) ; + struct RegExpr { + friend class Match ; + friend void swap( RegExpr& a , RegExpr& b ) ; + static constexpr size_t ErrMsgSz = 120 ; // per PCRE doc + // cxtors & casts + RegExpr() = default ; + RegExpr( ::string const& pattern , bool /*fast*/=false , bool /*no_groups*/=false ) { + int err = 0 ; + PCRE2_SIZE err_pos = 0 ; + _code = pcre2_compile( + _cast_in(pattern.c_str()) , pattern.size() + , PCRE2_ANCHORED | PCRE2_DOTALL | PCRE2_ENDANCHORED + , &err , &err_pos + , nullptr/*context*/ + ) ; + if (!_code) { + char err_buf[ErrMsgSz] ; + pcre2_get_error_message(err,_cast_in(err_buf),sizeof(err_buf)) ; + throw ::string(err_buf) ; + } + } + ~RegExpr() { + pcre2_code_free(_code) ; + } + // + RegExpr (RegExpr&& re) { swap(*this,re) ; } + RegExpr& operator=(RegExpr&& re) { swap(*this,re) ; return *this ; } + // services + Match match (::string const& subject) const { return { *this ,subject } ; } + bool can_match(::string const& subject) const { return +match(subject) ; } + size_t mark_count() const { + uint32_t cnt ; + pcre2_pattern_info( _code , PCRE2_INFO_CAPTURECOUNT , &cnt ) ; + return cnt ; + } + // data + private : + pcre2_code* _code = nullptr ; + } ; + inline void swap( RegExpr& a , RegExpr& b ) { + ::swap(a._code,b._code) ; } - // - bool operator+() const { return !empty() ; } - bool operator!() const { return !+*this ; } - } ; - - struct RegExpr : ::regex { - static constexpr ::regex_constants::syntax_option_type None { 0 } ; - // cxtors & casts - RegExpr() = default ; - RegExpr( ::string const& pattern , bool fast=false , bool no_groups=false ) : - ::regex{ pattern , - /**/ ::regex::ECMAScript - | (fast ? ::regex::optimize : None ) - | (no_groups ? ::regex::nosubs : None ) - } - {} - // services - Match match(::string const& txt) const { - Match res ; - ::regex_match(txt,res,*this) ; + + inline pcre2_match_data* Match::_s_mk_data(RegExpr const& re) { + pcre2_match_data* res = pcre2_match_data_create_from_pattern(re._code,nullptr) ; + SWEAR(pcre2_get_ovector_count(res)>0) ; + pcre2_get_ovector_pointer(res)[0] = PCRE2_UNSET ; return res ; } - bool can_match(::string const& txt) const { - Match m ; - return ::regex_match(txt,m,*this) ; + + inline Match::Match( RegExpr const& re ) : _data{_s_mk_data(re)} {} + inline Match::Match( RegExpr const& re , ::string const& s ) : _data{_s_mk_data(re)} , _subject{s} { + pcre2_match( + re._code + , _cast_in(_subject.c_str()) , _subject.size() , 0/*start_offset*/ + , 0/*options*/ + , _data + , nullptr/*context*/ + ) ; } - } ; + + #else + + struct Match : private ::smatch { + friend class RegExpr ; + // cxtors & casts + private : + using smatch::smatch ; + // accesses + public : + bool operator+() const { return !empty() ; } + bool operator!() const { return !+*this ; } + // + ::string_view operator[](size_t i) const { + ::sub_match sm = smatch::operator[](i) ; + return {sm.first,sm.second} ; + } + } ; + + struct RegExpr : private ::regex { + friend class Match ; + static constexpr ::regex_constants::syntax_option_type None { 0 } ; + // cxtors & casts + RegExpr() = default ; + RegExpr( ::string const& pattern , bool fast=false , bool no_groups=false ) : + ::regex{ pattern , + /**/ ::regex::ECMAScript + | (fast ? ::regex::optimize : None ) + | (no_groups ? ::regex::nosubs : None ) + } + {} + // services + Match match(::string const& txt) const { + Match res ; + ::regex_match(txt,res,*this) ; + return res ; + } + bool can_match(::string const& txt) const { + return +match(txt) ; + } + size_t mark_count() const { + return ::regex::mark_count() ; + } + } ; + + #endif } diff --git a/src/xxhsum.cc b/src/xxhsum.cc index 706a38ba..991f3f87 100644 --- a/src/xxhsum.cc +++ b/src/xxhsum.cc @@ -5,9 +5,12 @@ #include "hash.hh" +#include "re.hh" + using namespace Hash ; int main( int argc , char* argv[] ) { + for( int i=1 ; i2) ::cout <<' '<< argv[i] ;