Skip to content

Commit

Permalink
Merge pull request #2 from inngest/feature/art-prefix-searching
Browse files Browse the repository at this point in the history
Adaptive radix tree aggregate matching
  • Loading branch information
tonyhb authored Jan 4, 2024
2 parents 0ea8ecf + 5693949 commit 49e3d60
Show file tree
Hide file tree
Showing 28 changed files with 3,514 additions and 105 deletions.
37 changes: 37 additions & 0 deletions .github/workflows/go.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Go

on:
push:
branches: [main]
pull_request:

jobs:
golangci:
name: lint
strategy:
matrix:
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: '1.21'
- name: Lint
run: |
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s v1.55.1
./bin/golangci-lint run --verbose
test-linux-race:
strategy:
matrix:
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: '1.21'
- name: Test
run: go test ./... -v -count=1
113 changes: 113 additions & 0 deletions caching_parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package expr

import (
"regexp"
"strconv"
"strings"
"sync"
"sync/atomic"

"github.com/google/cel-go/cel"
// "github.com/karlseguin/ccache/v2"
)

var (
doubleQuoteMatch *regexp.Regexp
replace = []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"}
)

func init() {
doubleQuoteMatch = regexp.MustCompile(`"[^"]*"`)
}

// NewCachingParser returns a CELParser which lifts quoted literals out of the expression
// as variables and uses caching to cache expression parsing, resulting in improved
// performance when parsing expressions.
func NewCachingParser(env *cel.Env) CELParser {
return &cachingParser{
env: env,
}
}

type cachingParser struct {
// cache is a global cache of precompiled expressions.
// cache *ccache.Cache
stupidNoInternetCache sync.Map

env *cel.Env

hits int64
misses int64
}

// liftLiterals lifts quoted literals into variables, allowing us to normalize
// expressions to increase cache hit rates.
func liftLiterals(expr string) (string, map[string]any) {
// TODO: Optimize this please. Use strconv.Unquote as the basis, and perform
// searches across each index quotes.

// If this contains an escape sequence (eg. `\` or `\'`), skip the lifting
// of literals out of the expression.
if strings.Contains(expr, `\"`) || strings.Contains(expr, `\'`) {
return expr, nil
}

var (
counter int
vars = map[string]any{}
)

rewrite := func(str string) string {
if counter > len(replace) {
return str
}

idx := replace[counter]
if val, err := strconv.Unquote(str); err == nil {
str = val
}
vars[idx] = str

counter++
return VarPrefix + idx
}

expr = doubleQuoteMatch.ReplaceAllStringFunc(expr, rewrite)
return expr, vars
}

func (c *cachingParser) Parse(expr string) (*cel.Ast, *cel.Issues, map[string]any) {
expr, vars := liftLiterals(expr)

// TODO: ccache, when I have internet.
if cached, ok := c.stupidNoInternetCache.Load(expr); ok {
p := cached.(ParsedCelExpr)
atomic.AddInt64(&c.hits, 1)
return p.AST, p.Issues, vars
}

ast, issues := c.env.Parse(expr)

c.stupidNoInternetCache.Store(expr, ParsedCelExpr{
Expr: expr,
AST: ast,
Issues: issues,
})

atomic.AddInt64(&c.misses, 1)
return ast, issues, vars
}

func (c *cachingParser) Hits() int64 {
return atomic.LoadInt64(&c.hits)
}

func (c *cachingParser) Misses() int64 {
return atomic.LoadInt64(&c.misses)
}

type ParsedCelExpr struct {
Expr string
AST *cel.Ast
Issues *cel.Issues
}
140 changes: 140 additions & 0 deletions caching_parser_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package expr

import (
"testing"

"github.com/google/cel-go/cel"
"github.com/stretchr/testify/require"
)

func TestCachingParser_CachesSame(t *testing.T) {
c := cachingParser{env: newEnv()}

a := `event.data.a == "cache"`
b := `event.data.b == "cache"`

var (
prevAST *cel.Ast
prevIssues *cel.Issues
prevVars map[string]any
)

t.Run("With an uncached expression", func(t *testing.T) {
prevAST, prevIssues, prevVars = c.Parse(a)
require.NotNil(t, prevAST)
require.Nil(t, prevIssues)
require.NotNil(t, prevVars)
require.EqualValues(t, 0, c.Hits())
require.EqualValues(t, 1, c.Misses())
})

t.Run("With a cached expression", func(t *testing.T) {
ast, issues, vars := c.Parse(a)
require.NotNil(t, ast)
require.Nil(t, issues)

require.Equal(t, prevAST, ast)
require.Equal(t, prevIssues, issues)
require.Equal(t, prevVars, vars)

require.EqualValues(t, 1, c.Hits())
require.EqualValues(t, 1, c.Misses())
})

t.Run("With another uncached expression", func(t *testing.T) {
prevAST, prevIssues, prevVars = c.Parse(b)
require.NotNil(t, prevAST)
require.Nil(t, prevIssues)
// This misses the cache, as the vars have changed - not the
// literals.
require.EqualValues(t, 1, c.Hits())
require.EqualValues(t, 2, c.Misses())
})
}

func TestCachingParser_CacheIgnoreLiterals_Unescaped(t *testing.T) {
c := cachingParser{env: newEnv()}

a := `event.data.a == "literal-a" && event.data.b == "yes-1"`
b := `event.data.a == "literal-b" && event.data.b == "yes-2"`

var (
prevAST *cel.Ast
prevIssues *cel.Issues
prevVars map[string]any
)

t.Run("With an uncached expression", func(t *testing.T) {
prevAST, prevIssues, prevVars = c.Parse(a)
require.NotNil(t, prevAST)
require.Nil(t, prevIssues)
require.EqualValues(t, 0, c.Hits())
require.EqualValues(t, 1, c.Misses())
})

t.Run("With a cached expression", func(t *testing.T) {
ast, issues, vars := c.Parse(a)
require.NotNil(t, ast)
require.Nil(t, issues)

require.Equal(t, prevAST, ast)
require.Equal(t, prevIssues, issues)
require.Equal(t, prevVars, vars)

require.EqualValues(t, 1, c.Hits())
require.EqualValues(t, 1, c.Misses())
})

t.Run("With a cached expression having different literals ONLY", func(t *testing.T) {
prevAST, prevIssues, _ = c.Parse(b)
require.NotNil(t, prevAST)
require.Nil(t, prevIssues)
// This misses the cache.
require.EqualValues(t, 2, c.Hits())
require.EqualValues(t, 1, c.Misses())
})
}

/*
func TestCachingParser_CacheIgnoreLiterals_Escaped(t *testing.T) {
return
c := cachingParser{env: newEnv()}
a := `event.data.a == "literal\"-a" && event.data.b == "yes"`
b := `event.data.a == "literal\"-b" && event.data.b == "yes"`
var (
prevAST *cel.Ast
prevIssues *cel.Issues
)
t.Run("With an uncached expression", func(t *testing.T) {
prevAST, prevIssues = c.Parse(a)
require.NotNil(t, prevAST)
require.Nil(t, prevIssues)
require.EqualValues(t, 0, c.Hits())
require.EqualValues(t, 1, c.Misses())
})
t.Run("With a cached expression", func(t *testing.T) {
ast, issues := c.Parse(a)
require.NotNil(t, ast)
require.Nil(t, issues)
require.Equal(t, prevAST, ast)
require.Equal(t, prevIssues, issues)
require.EqualValues(t, 1, c.Hits())
require.EqualValues(t, 1, c.Misses())
})
t.Run("With a cached expression having different literals ONLY", func(t *testing.T) {
prevAST, prevIssues = c.Parse(b)
require.NotNil(t, prevAST)
require.Nil(t, prevIssues)
// This misses the cache.
require.EqualValues(t, 2, c.Hits())
require.EqualValues(t, 1, c.Misses())
})
}
*/
Loading

0 comments on commit 49e3d60

Please sign in to comment.