Skip to content

Commit

Permalink
chore: do not re-create scrapers each time
Browse files Browse the repository at this point in the history
Signed-off-by: mudler <[email protected]>
  • Loading branch information
mudler committed Nov 26, 2024
1 parent 92327fa commit 84a8179
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 7 deletions.
23 changes: 23 additions & 0 deletions internal/jobserver/jobserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"github.com/google/uuid"
"github.com/masa-finance/tee-worker/api/types"
"github.com/masa-finance/tee-worker/internal/jobs"
)

type JobServer struct {
Expand All @@ -16,13 +17,35 @@ type JobServer struct {

results map[string]types.JobResult
jobConfiguration types.JobConfiguration

jobWorkers map[string]*jobWorkerEntry
}

type jobWorkerEntry struct {
w worker
sync.Mutex
}

func NewJobServer(workers int, jc types.JobConfiguration) *JobServer {
if workers == 0 {
workers++
}

jobworkers := make(map[string]*jobWorkerEntry)

for _, t := range []string{jobs.WebScraperType, jobs.TwitterScraperType} {
switch t {
case jobs.WebScraperType:
jobworkers[jobs.WebScraperType] = &jobWorkerEntry{
w: jobs.NewWebScraper(jc),
}
case jobs.TwitterScraperType:
jobworkers[jobs.TwitterScraperType] = &jobWorkerEntry{
w: jobs.NewTwitterScraper(jc),
}
}
}

return &JobServer{
jobChan: make(chan types.Job),
results: make(map[string]types.JobResult),
Expand Down
18 changes: 11 additions & 7 deletions internal/jobserver/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"fmt"

"github.com/masa-finance/tee-worker/api/types"
"github.com/masa-finance/tee-worker/internal/jobs"
)

func (js *JobServer) worker(c context.Context) {
Expand All @@ -27,11 +26,10 @@ type worker interface {
}

func (js *JobServer) doWork(j types.Job) error {
var w worker
switch j.Type {
case jobs.WebScraperType:
w = jobs.NewWebScraper(js.jobConfiguration)
default:
// TODO: add sync.Mutex for accessing jobWorkers
w, exists := js.jobWorkers[j.Type]

if !exists {
js.Lock()
js.results[j.UUID] = types.JobResult{
Error: fmt.Sprintf("unknown job type: %s", j.Type),
Expand All @@ -40,7 +38,13 @@ func (js *JobServer) doWork(j types.Job) error {
return fmt.Errorf("unknown job type: %s", j.Type)
}

result, err := w.ExecuteJob(j)
// XXX: Shall we lock the resource or create a new instance each time?
// Behavior is not defined yet as the only requirements we have is that
// some scrapers might have rate limits, so we don't want to create a new clients
// every time (?)
w.Lock()
defer w.Unlock()
result, err := w.w.ExecuteJob(j)
if err != nil {
result.Error = err.Error()
}
Expand Down

0 comments on commit 84a8179

Please sign in to comment.